[
  {
    "path": ".github/ISSUE_TEMPLATE/issue-template.md",
    "content": "---\nname: Issue template\nabout: Use this template for reporting your problem\ntitle: ''\nlabels: ''\nassignees: ''\n\n---\n\n**System information**\n* Have I written custom code:\n* OS Platform(e.g., window10 or Linux Ubuntu 16.04):\n* Python version:\n* Deep learning framework and version(e.g., Tensorflow2.1 or Pytorch1.3):\n* Use GPU or not:\n* CUDA/cuDNN version(if you use GPU):\n* The network you trained(e.g., Resnet34 network):\n\n**Describe the current behavior**\n\n**Error info / logs**\n"
  },
  {
    "path": ".gitignore",
    "content": "##ignore this file##\n*.idea\n__pycache__\n*.zip\nflower_data\n*.h5\n*.pth\n*.pt\n*.jpg\n*.ckpt.*\n*.ckpt\n*.config\n*.gz\n*.onnx\n*.xml\n*.bin\n*.mapping\n*.csv\ncheckpoint\ndata\nVOCdevkit\nssd_resnet50_v1_fpn_shared_box_predictor\nruns\n"
  },
  {
    "path": "LICENSE",
    "content": "                    GNU GENERAL PUBLIC LICENSE\n                       Version 3, 29 June 2007\n\n Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>\n Everyone is permitted to copy and distribute verbatim copies\n of this license document, but changing it is not allowed.\n\n                            Preamble\n\n  The GNU General Public License is a free, copyleft license for\nsoftware and other kinds of works.\n\n  The licenses for most software and other practical works are designed\nto take away your freedom to share and change the works.  By contrast,\nthe GNU General Public License is intended to guarantee your freedom to\nshare and change all versions of a program--to make sure it remains free\nsoftware for all its users.  We, the Free Software Foundation, use the\nGNU General Public License for most of our software; it applies also to\nany other work released this way by its authors.  You can apply it to\nyour programs, too.\n\n  When we speak of free software, we are referring to freedom, not\nprice.  Our General Public Licenses are designed to make sure that you\nhave the freedom to distribute copies of free software (and charge for\nthem if you wish), that you receive source code or can get it if you\nwant it, that you can change the software or use pieces of it in new\nfree programs, and that you know you can do these things.\n\n  To protect your rights, we need to prevent others from denying you\nthese rights or asking you to surrender the rights.  Therefore, you have\ncertain responsibilities if you distribute copies of the software, or if\nyou modify it: responsibilities to respect the freedom of others.\n\n  For example, if you distribute copies of such a program, whether\ngratis or for a fee, you must pass on to the recipients the same\nfreedoms that you received.  You must make sure that they, too, receive\nor can get the source code.  And you must show them these terms so they\nknow their rights.\n\n  Developers that use the GNU GPL protect your rights with two steps:\n(1) assert copyright on the software, and (2) offer you this License\ngiving you legal permission to copy, distribute and/or modify it.\n\n  For the developers' and authors' protection, the GPL clearly explains\nthat there is no warranty for this free software.  For both users' and\nauthors' sake, the GPL requires that modified versions be marked as\nchanged, so that their problems will not be attributed erroneously to\nauthors of previous versions.\n\n  Some devices are designed to deny users access to install or run\nmodified versions of the software inside them, although the manufacturer\ncan do so.  This is fundamentally incompatible with the aim of\nprotecting users' freedom to change the software.  The systematic\npattern of such abuse occurs in the area of products for individuals to\nuse, which is precisely where it is most unacceptable.  Therefore, we\nhave designed this version of the GPL to prohibit the practice for those\nproducts.  If such problems arise substantially in other domains, we\nstand ready to extend this provision to those domains in future versions\nof the GPL, as needed to protect the freedom of users.\n\n  Finally, every program is threatened constantly by software patents.\nStates should not allow patents to restrict development and use of\nsoftware on general-purpose computers, but in those that do, we wish to\navoid the special danger that patents applied to a free program could\nmake it effectively proprietary.  To prevent this, the GPL assures that\npatents cannot be used to render the program non-free.\n\n  The precise terms and conditions for copying, distribution and\nmodification follow.\n\n                       TERMS AND CONDITIONS\n\n  0. Definitions.\n\n  \"This License\" refers to version 3 of the GNU General Public License.\n\n  \"Copyright\" also means copyright-like laws that apply to other kinds of\nworks, such as semiconductor masks.\n\n  \"The Program\" refers to any copyrightable work licensed under this\nLicense.  Each licensee is addressed as \"you\".  \"Licensees\" and\n\"recipients\" may be individuals or organizations.\n\n  To \"modify\" a work means to copy from or adapt all or part of the work\nin a fashion requiring copyright permission, other than the making of an\nexact copy.  The resulting work is called a \"modified version\" of the\nearlier work or a work \"based on\" the earlier work.\n\n  A \"covered work\" means either the unmodified Program or a work based\non the Program.\n\n  To \"propagate\" a work means to do anything with it that, without\npermission, would make you directly or secondarily liable for\ninfringement under applicable copyright law, except executing it on a\ncomputer or modifying a private copy.  Propagation includes copying,\ndistribution (with or without modification), making available to the\npublic, and in some countries other activities as well.\n\n  To \"convey\" a work means any kind of propagation that enables other\nparties to make or receive copies.  Mere interaction with a user through\na computer network, with no transfer of a copy, is not conveying.\n\n  An interactive user interface displays \"Appropriate Legal Notices\"\nto the extent that it includes a convenient and prominently visible\nfeature that (1) displays an appropriate copyright notice, and (2)\ntells the user that there is no warranty for the work (except to the\nextent that warranties are provided), that licensees may convey the\nwork under this License, and how to view a copy of this License.  If\nthe interface presents a list of user commands or options, such as a\nmenu, a prominent item in the list meets this criterion.\n\n  1. Source Code.\n\n  The \"source code\" for a work means the preferred form of the work\nfor making modifications to it.  \"Object code\" means any non-source\nform of a work.\n\n  A \"Standard Interface\" means an interface that either is an official\nstandard defined by a recognized standards body, or, in the case of\ninterfaces specified for a particular programming language, one that\nis widely used among developers working in that language.\n\n  The \"System Libraries\" of an executable work include anything, other\nthan the work as a whole, that (a) is included in the normal form of\npackaging a Major Component, but which is not part of that Major\nComponent, and (b) serves only to enable use of the work with that\nMajor Component, or to implement a Standard Interface for which an\nimplementation is available to the public in source code form.  A\n\"Major Component\", in this context, means a major essential component\n(kernel, window system, and so on) of the specific operating system\n(if any) on which the executable work runs, or a compiler used to\nproduce the work, or an object code interpreter used to run it.\n\n  The \"Corresponding Source\" for a work in object code form means all\nthe source code needed to generate, install, and (for an executable\nwork) run the object code and to modify the work, including scripts to\ncontrol those activities.  However, it does not include the work's\nSystem Libraries, or general-purpose tools or generally available free\nprograms which are used unmodified in performing those activities but\nwhich are not part of the work.  For example, Corresponding Source\nincludes interface definition files associated with source files for\nthe work, and the source code for shared libraries and dynamically\nlinked subprograms that the work is specifically designed to require,\nsuch as by intimate data communication or control flow between those\nsubprograms and other parts of the work.\n\n  The Corresponding Source need not include anything that users\ncan regenerate automatically from other parts of the Corresponding\nSource.\n\n  The Corresponding Source for a work in source code form is that\nsame work.\n\n  2. Basic Permissions.\n\n  All rights granted under this License are granted for the term of\ncopyright on the Program, and are irrevocable provided the stated\nconditions are met.  This License explicitly affirms your unlimited\npermission to run the unmodified Program.  The output from running a\ncovered work is covered by this License only if the output, given its\ncontent, constitutes a covered work.  This License acknowledges your\nrights of fair use or other equivalent, as provided by copyright law.\n\n  You may make, run and propagate covered works that you do not\nconvey, without conditions so long as your license otherwise remains\nin force.  You may convey covered works to others for the sole purpose\nof having them make modifications exclusively for you, or provide you\nwith facilities for running those works, provided that you comply with\nthe terms of this License in conveying all material for which you do\nnot control copyright.  Those thus making or running the covered works\nfor you must do so exclusively on your behalf, under your direction\nand control, on terms that prohibit them from making any copies of\nyour copyrighted material outside their relationship with you.\n\n  Conveying under any other circumstances is permitted solely under\nthe conditions stated below.  Sublicensing is not allowed; section 10\nmakes it unnecessary.\n\n  3. Protecting Users' Legal Rights From Anti-Circumvention Law.\n\n  No covered work shall be deemed part of an effective technological\nmeasure under any applicable law fulfilling obligations under article\n11 of the WIPO copyright treaty adopted on 20 December 1996, or\nsimilar laws prohibiting or restricting circumvention of such\nmeasures.\n\n  When you convey a covered work, you waive any legal power to forbid\ncircumvention of technological measures to the extent such circumvention\nis effected by exercising rights under this License with respect to\nthe covered work, and you disclaim any intention to limit operation or\nmodification of the work as a means of enforcing, against the work's\nusers, your or third parties' legal rights to forbid circumvention of\ntechnological measures.\n\n  4. Conveying Verbatim Copies.\n\n  You may convey verbatim copies of the Program's source code as you\nreceive it, in any medium, provided that you conspicuously and\nappropriately publish on each copy an appropriate copyright notice;\nkeep intact all notices stating that this License and any\nnon-permissive terms added in accord with section 7 apply to the code;\nkeep intact all notices of the absence of any warranty; and give all\nrecipients a copy of this License along with the Program.\n\n  You may charge any price or no price for each copy that you convey,\nand you may offer support or warranty protection for a fee.\n\n  5. Conveying Modified Source Versions.\n\n  You may convey a work based on the Program, or the modifications to\nproduce it from the Program, in the form of source code under the\nterms of section 4, provided that you also meet all of these conditions:\n\n    a) The work must carry prominent notices stating that you modified\n    it, and giving a relevant date.\n\n    b) The work must carry prominent notices stating that it is\n    released under this License and any conditions added under section\n    7.  This requirement modifies the requirement in section 4 to\n    \"keep intact all notices\".\n\n    c) You must license the entire work, as a whole, under this\n    License to anyone who comes into possession of a copy.  This\n    License will therefore apply, along with any applicable section 7\n    additional terms, to the whole of the work, and all its parts,\n    regardless of how they are packaged.  This License gives no\n    permission to license the work in any other way, but it does not\n    invalidate such permission if you have separately received it.\n\n    d) If the work has interactive user interfaces, each must display\n    Appropriate Legal Notices; however, if the Program has interactive\n    interfaces that do not display Appropriate Legal Notices, your\n    work need not make them do so.\n\n  A compilation of a covered work with other separate and independent\nworks, which are not by their nature extensions of the covered work,\nand which are not combined with it such as to form a larger program,\nin or on a volume of a storage or distribution medium, is called an\n\"aggregate\" if the compilation and its resulting copyright are not\nused to limit the access or legal rights of the compilation's users\nbeyond what the individual works permit.  Inclusion of a covered work\nin an aggregate does not cause this License to apply to the other\nparts of the aggregate.\n\n  6. Conveying Non-Source Forms.\n\n  You may convey a covered work in object code form under the terms\nof sections 4 and 5, provided that you also convey the\nmachine-readable Corresponding Source under the terms of this License,\nin one of these ways:\n\n    a) Convey the object code in, or embodied in, a physical product\n    (including a physical distribution medium), accompanied by the\n    Corresponding Source fixed on a durable physical medium\n    customarily used for software interchange.\n\n    b) Convey the object code in, or embodied in, a physical product\n    (including a physical distribution medium), accompanied by a\n    written offer, valid for at least three years and valid for as\n    long as you offer spare parts or customer support for that product\n    model, to give anyone who possesses the object code either (1) a\n    copy of the Corresponding Source for all the software in the\n    product that is covered by this License, on a durable physical\n    medium customarily used for software interchange, for a price no\n    more than your reasonable cost of physically performing this\n    conveying of source, or (2) access to copy the\n    Corresponding Source from a network server at no charge.\n\n    c) Convey individual copies of the object code with a copy of the\n    written offer to provide the Corresponding Source.  This\n    alternative is allowed only occasionally and noncommercially, and\n    only if you received the object code with such an offer, in accord\n    with subsection 6b.\n\n    d) Convey the object code by offering access from a designated\n    place (gratis or for a charge), and offer equivalent access to the\n    Corresponding Source in the same way through the same place at no\n    further charge.  You need not require recipients to copy the\n    Corresponding Source along with the object code.  If the place to\n    copy the object code is a network server, the Corresponding Source\n    may be on a different server (operated by you or a third party)\n    that supports equivalent copying facilities, provided you maintain\n    clear directions next to the object code saying where to find the\n    Corresponding Source.  Regardless of what server hosts the\n    Corresponding Source, you remain obligated to ensure that it is\n    available for as long as needed to satisfy these requirements.\n\n    e) Convey the object code using peer-to-peer transmission, provided\n    you inform other peers where the object code and Corresponding\n    Source of the work are being offered to the general public at no\n    charge under subsection 6d.\n\n  A separable portion of the object code, whose source code is excluded\nfrom the Corresponding Source as a System Library, need not be\nincluded in conveying the object code work.\n\n  A \"User Product\" is either (1) a \"consumer product\", which means any\ntangible personal property which is normally used for personal, family,\nor household purposes, or (2) anything designed or sold for incorporation\ninto a dwelling.  In determining whether a product is a consumer product,\ndoubtful cases shall be resolved in favor of coverage.  For a particular\nproduct received by a particular user, \"normally used\" refers to a\ntypical or common use of that class of product, regardless of the status\nof the particular user or of the way in which the particular user\nactually uses, or expects or is expected to use, the product.  A product\nis a consumer product regardless of whether the product has substantial\ncommercial, industrial or non-consumer uses, unless such uses represent\nthe only significant mode of use of the product.\n\n  \"Installation Information\" for a User Product means any methods,\nprocedures, authorization keys, or other information required to install\nand execute modified versions of a covered work in that User Product from\na modified version of its Corresponding Source.  The information must\nsuffice to ensure that the continued functioning of the modified object\ncode is in no case prevented or interfered with solely because\nmodification has been made.\n\n  If you convey an object code work under this section in, or with, or\nspecifically for use in, a User Product, and the conveying occurs as\npart of a transaction in which the right of possession and use of the\nUser Product is transferred to the recipient in perpetuity or for a\nfixed term (regardless of how the transaction is characterized), the\nCorresponding Source conveyed under this section must be accompanied\nby the Installation Information.  But this requirement does not apply\nif neither you nor any third party retains the ability to install\nmodified object code on the User Product (for example, the work has\nbeen installed in ROM).\n\n  The requirement to provide Installation Information does not include a\nrequirement to continue to provide support service, warranty, or updates\nfor a work that has been modified or installed by the recipient, or for\nthe User Product in which it has been modified or installed.  Access to a\nnetwork may be denied when the modification itself materially and\nadversely affects the operation of the network or violates the rules and\nprotocols for communication across the network.\n\n  Corresponding Source conveyed, and Installation Information provided,\nin accord with this section must be in a format that is publicly\ndocumented (and with an implementation available to the public in\nsource code form), and must require no special password or key for\nunpacking, reading or copying.\n\n  7. Additional Terms.\n\n  \"Additional permissions\" are terms that supplement the terms of this\nLicense by making exceptions from one or more of its conditions.\nAdditional permissions that are applicable to the entire Program shall\nbe treated as though they were included in this License, to the extent\nthat they are valid under applicable law.  If additional permissions\napply only to part of the Program, that part may be used separately\nunder those permissions, but the entire Program remains governed by\nthis License without regard to the additional permissions.\n\n  When you convey a copy of a covered work, you may at your option\nremove any additional permissions from that copy, or from any part of\nit.  (Additional permissions may be written to require their own\nremoval in certain cases when you modify the work.)  You may place\nadditional permissions on material, added by you to a covered work,\nfor which you have or can give appropriate copyright permission.\n\n  Notwithstanding any other provision of this License, for material you\nadd to a covered work, you may (if authorized by the copyright holders of\nthat material) supplement the terms of this License with terms:\n\n    a) Disclaiming warranty or limiting liability differently from the\n    terms of sections 15 and 16 of this License; or\n\n    b) Requiring preservation of specified reasonable legal notices or\n    author attributions in that material or in the Appropriate Legal\n    Notices displayed by works containing it; or\n\n    c) Prohibiting misrepresentation of the origin of that material, or\n    requiring that modified versions of such material be marked in\n    reasonable ways as different from the original version; or\n\n    d) Limiting the use for publicity purposes of names of licensors or\n    authors of the material; or\n\n    e) Declining to grant rights under trademark law for use of some\n    trade names, trademarks, or service marks; or\n\n    f) Requiring indemnification of licensors and authors of that\n    material by anyone who conveys the material (or modified versions of\n    it) with contractual assumptions of liability to the recipient, for\n    any liability that these contractual assumptions directly impose on\n    those licensors and authors.\n\n  All other non-permissive additional terms are considered \"further\nrestrictions\" within the meaning of section 10.  If the Program as you\nreceived it, or any part of it, contains a notice stating that it is\ngoverned by this License along with a term that is a further\nrestriction, you may remove that term.  If a license document contains\na further restriction but permits relicensing or conveying under this\nLicense, you may add to a covered work material governed by the terms\nof that license document, provided that the further restriction does\nnot survive such relicensing or conveying.\n\n  If you add terms to a covered work in accord with this section, you\nmust place, in the relevant source files, a statement of the\nadditional terms that apply to those files, or a notice indicating\nwhere to find the applicable terms.\n\n  Additional terms, permissive or non-permissive, may be stated in the\nform of a separately written license, or stated as exceptions;\nthe above requirements apply either way.\n\n  8. Termination.\n\n  You may not propagate or modify a covered work except as expressly\nprovided under this License.  Any attempt otherwise to propagate or\nmodify it is void, and will automatically terminate your rights under\nthis License (including any patent licenses granted under the third\nparagraph of section 11).\n\n  However, if you cease all violation of this License, then your\nlicense from a particular copyright holder is reinstated (a)\nprovisionally, unless and until the copyright holder explicitly and\nfinally terminates your license, and (b) permanently, if the copyright\nholder fails to notify you of the violation by some reasonable means\nprior to 60 days after the cessation.\n\n  Moreover, your license from a particular copyright holder is\nreinstated permanently if the copyright holder notifies you of the\nviolation by some reasonable means, this is the first time you have\nreceived notice of violation of this License (for any work) from that\ncopyright holder, and you cure the violation prior to 30 days after\nyour receipt of the notice.\n\n  Termination of your rights under this section does not terminate the\nlicenses of parties who have received copies or rights from you under\nthis License.  If your rights have been terminated and not permanently\nreinstated, you do not qualify to receive new licenses for the same\nmaterial under section 10.\n\n  9. Acceptance Not Required for Having Copies.\n\n  You are not required to accept this License in order to receive or\nrun a copy of the Program.  Ancillary propagation of a covered work\noccurring solely as a consequence of using peer-to-peer transmission\nto receive a copy likewise does not require acceptance.  However,\nnothing other than this License grants you permission to propagate or\nmodify any covered work.  These actions infringe copyright if you do\nnot accept this License.  Therefore, by modifying or propagating a\ncovered work, you indicate your acceptance of this License to do so.\n\n  10. Automatic Licensing of Downstream Recipients.\n\n  Each time you convey a covered work, the recipient automatically\nreceives a license from the original licensors, to run, modify and\npropagate that work, subject to this License.  You are not responsible\nfor enforcing compliance by third parties with this License.\n\n  An \"entity transaction\" is a transaction transferring control of an\norganization, or substantially all assets of one, or subdividing an\norganization, or merging organizations.  If propagation of a covered\nwork results from an entity transaction, each party to that\ntransaction who receives a copy of the work also receives whatever\nlicenses to the work the party's predecessor in interest had or could\ngive under the previous paragraph, plus a right to possession of the\nCorresponding Source of the work from the predecessor in interest, if\nthe predecessor has it or can get it with reasonable efforts.\n\n  You may not impose any further restrictions on the exercise of the\nrights granted or affirmed under this License.  For example, you may\nnot impose a license fee, royalty, or other charge for exercise of\nrights granted under this License, and you may not initiate litigation\n(including a cross-claim or counterclaim in a lawsuit) alleging that\nany patent claim is infringed by making, using, selling, offering for\nsale, or importing the Program or any portion of it.\n\n  11. Patents.\n\n  A \"contributor\" is a copyright holder who authorizes use under this\nLicense of the Program or a work on which the Program is based.  The\nwork thus licensed is called the contributor's \"contributor version\".\n\n  A contributor's \"essential patent claims\" are all patent claims\nowned or controlled by the contributor, whether already acquired or\nhereafter acquired, that would be infringed by some manner, permitted\nby this License, of making, using, or selling its contributor version,\nbut do not include claims that would be infringed only as a\nconsequence of further modification of the contributor version.  For\npurposes of this definition, \"control\" includes the right to grant\npatent sublicenses in a manner consistent with the requirements of\nthis License.\n\n  Each contributor grants you a non-exclusive, worldwide, royalty-free\npatent license under the contributor's essential patent claims, to\nmake, use, sell, offer for sale, import and otherwise run, modify and\npropagate the contents of its contributor version.\n\n  In the following three paragraphs, a \"patent license\" is any express\nagreement or commitment, however denominated, not to enforce a patent\n(such as an express permission to practice a patent or covenant not to\nsue for patent infringement).  To \"grant\" such a patent license to a\nparty means to make such an agreement or commitment not to enforce a\npatent against the party.\n\n  If you convey a covered work, knowingly relying on a patent license,\nand the Corresponding Source of the work is not available for anyone\nto copy, free of charge and under the terms of this License, through a\npublicly available network server or other readily accessible means,\nthen you must either (1) cause the Corresponding Source to be so\navailable, or (2) arrange to deprive yourself of the benefit of the\npatent license for this particular work, or (3) arrange, in a manner\nconsistent with the requirements of this License, to extend the patent\nlicense to downstream recipients.  \"Knowingly relying\" means you have\nactual knowledge that, but for the patent license, your conveying the\ncovered work in a country, or your recipient's use of the covered work\nin a country, would infringe one or more identifiable patents in that\ncountry that you have reason to believe are valid.\n\n  If, pursuant to or in connection with a single transaction or\narrangement, you convey, or propagate by procuring conveyance of, a\ncovered work, and grant a patent license to some of the parties\nreceiving the covered work authorizing them to use, propagate, modify\nor convey a specific copy of the covered work, then the patent license\nyou grant is automatically extended to all recipients of the covered\nwork and works based on it.\n\n  A patent license is \"discriminatory\" if it does not include within\nthe scope of its coverage, prohibits the exercise of, or is\nconditioned on the non-exercise of one or more of the rights that are\nspecifically granted under this License.  You may not convey a covered\nwork if you are a party to an arrangement with a third party that is\nin the business of distributing software, under which you make payment\nto the third party based on the extent of your activity of conveying\nthe work, and under which the third party grants, to any of the\nparties who would receive the covered work from you, a discriminatory\npatent license (a) in connection with copies of the covered work\nconveyed by you (or copies made from those copies), or (b) primarily\nfor and in connection with specific products or compilations that\ncontain the covered work, unless you entered into that arrangement,\nor that patent license was granted, prior to 28 March 2007.\n\n  Nothing in this License shall be construed as excluding or limiting\nany implied license or other defenses to infringement that may\notherwise be available to you under applicable patent law.\n\n  12. No Surrender of Others' Freedom.\n\n  If conditions are imposed on you (whether by court order, agreement or\notherwise) that contradict the conditions of this License, they do not\nexcuse you from the conditions of this License.  If you cannot convey a\ncovered work so as to satisfy simultaneously your obligations under this\nLicense and any other pertinent obligations, then as a consequence you may\nnot convey it at all.  For example, if you agree to terms that obligate you\nto collect a royalty for further conveying from those to whom you convey\nthe Program, the only way you could satisfy both those terms and this\nLicense would be to refrain entirely from conveying the Program.\n\n  13. Use with the GNU Affero General Public License.\n\n  Notwithstanding any other provision of this License, you have\npermission to link or combine any covered work with a work licensed\nunder version 3 of the GNU Affero General Public License into a single\ncombined work, and to convey the resulting work.  The terms of this\nLicense will continue to apply to the part which is the covered work,\nbut the special requirements of the GNU Affero General Public License,\nsection 13, concerning interaction through a network will apply to the\ncombination as such.\n\n  14. Revised Versions of this License.\n\n  The Free Software Foundation may publish revised and/or new versions of\nthe GNU General Public License from time to time.  Such new versions will\nbe similar in spirit to the present version, but may differ in detail to\naddress new problems or concerns.\n\n  Each version is given a distinguishing version number.  If the\nProgram specifies that a certain numbered version of the GNU General\nPublic License \"or any later version\" applies to it, you have the\noption of following the terms and conditions either of that numbered\nversion or of any later version published by the Free Software\nFoundation.  If the Program does not specify a version number of the\nGNU General Public License, you may choose any version ever published\nby the Free Software Foundation.\n\n  If the Program specifies that a proxy can decide which future\nversions of the GNU General Public License can be used, that proxy's\npublic statement of acceptance of a version permanently authorizes you\nto choose that version for the Program.\n\n  Later license versions may give you additional or different\npermissions.  However, no additional obligations are imposed on any\nauthor or copyright holder as a result of your choosing to follow a\nlater version.\n\n  15. Disclaimer of Warranty.\n\n  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY\nAPPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT\nHOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM \"AS IS\" WITHOUT WARRANTY\nOF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,\nTHE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\nPURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM\nIS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF\nALL NECESSARY SERVICING, REPAIR OR CORRECTION.\n\n  16. Limitation of Liability.\n\n  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\nWILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS\nTHE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY\nGENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE\nUSE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF\nDATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD\nPARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),\nEVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF\nSUCH DAMAGES.\n\n  17. Interpretation of Sections 15 and 16.\n\n  If the disclaimer of warranty and limitation of liability provided\nabove cannot be given local legal effect according to their terms,\nreviewing courts shall apply local law that most closely approximates\nan absolute waiver of all civil liability in connection with the\nProgram, unless a warranty or assumption of liability accompanies a\ncopy of the Program in return for a fee.\n\n                     END OF TERMS AND CONDITIONS\n\n            How to Apply These Terms to Your New Programs\n\n  If you develop a new program, and you want it to be of the greatest\npossible use to the public, the best way to achieve this is to make it\nfree software which everyone can redistribute and change under these terms.\n\n  To do so, attach the following notices to the program.  It is safest\nto attach them to the start of each source file to most effectively\nstate the exclusion of warranty; and each file should have at least\nthe \"copyright\" line and a pointer to where the full notice is found.\n\n    <one line to give the program's name and a brief idea of what it does.>\n    Copyright (C) <year>  <name of author>\n\n    This program is free software: you can redistribute it and/or modify\n    it under the terms of the GNU General Public License as published by\n    the Free Software Foundation, either version 3 of the License, or\n    (at your option) any later version.\n\n    This program is distributed in the hope that it will be useful,\n    but WITHOUT ANY WARRANTY; without even the implied warranty of\n    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n    GNU General Public License for more details.\n\n    You should have received a copy of the GNU General Public License\n    along with this program.  If not, see <https://www.gnu.org/licenses/>.\n\nAlso add information on how to contact you by electronic and paper mail.\n\n  If the program does terminal interaction, make it output a short\nnotice like this when it starts in an interactive mode:\n\n    <program>  Copyright (C) <year>  <name of author>\n    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.\n    This is free software, and you are welcome to redistribute it\n    under certain conditions; type `show c' for details.\n\nThe hypothetical commands `show w' and `show c' should show the appropriate\nparts of the General Public License.  Of course, your program's commands\nmight be different; for a GUI interface, you would use an \"about box\".\n\n  You should also get your employer (if you work as a programmer) or school,\nif any, to sign a \"copyright disclaimer\" for the program, if necessary.\nFor more information on this, and how to apply and follow the GNU GPL, see\n<https://www.gnu.org/licenses/>.\n\n  The GNU General Public License does not permit incorporating your program\ninto proprietary programs.  If your program is a subroutine library, you\nmay consider it more useful to permit linking proprietary applications with\nthe library.  If this is what you want to do, use the GNU Lesser General\nPublic License instead of this License.  But first, please read\n<https://www.gnu.org/licenses/why-not-lgpl.html>.\n"
  },
  {
    "path": "README.md",
    "content": "# 深度学习在图像处理中的应用教程\n\n## 前言\n* 本教程是对本人研究生期间的研究内容进行整理总结，总结的同时也希望能够帮助更多的小伙伴。后期如果有学习到新的知识也会与大家一起分享。\n* 本教程会以视频的方式进行分享，教学流程如下：  \n1）介绍网络的结构与创新点  \n2）使用Pytorch进行网络的搭建与训练  \n3）使用Tensorflow（内部的keras模块）进行网络的搭建与训练 \n* 课程中所有PPT都放在`course_ppt`文件夹下，需要的自行下载。\n\n\n## 教程目录，点击跳转相应视频（后期会根据学习内容增加）\n\n* 图像分类\n  * LeNet（已完成）\n    * [Pytorch官方demo(Lenet)](https://www.bilibili.com/video/BV187411T7Ye)\n    * [Tensorflow2官方demo](https://www.bilibili.com/video/BV1n7411T7o6)\n\n  * AlexNet（已完成）\n    * [AlexNet网络讲解](https://www.bilibili.com/video/BV1p7411T7Pc)\n    * [Pytorch搭建AlexNet](https://www.bilibili.com/video/BV1W7411T7qc)\n    * [Tensorflow2搭建Alexnet](https://www.bilibili.com/video/BV1s7411T7vs)\n\n  * VggNet（已完成）\n    * [VggNet网络讲解](https://www.bilibili.com/video/BV1q7411T7Y6)\n    * [Pytorch搭建VGG网络](https://www.bilibili.com/video/BV1i7411T7ZN)\n    * [Tensorflow2搭建VGG网络](https://www.bilibili.com/video/BV1q7411T76b)\n\n  * GoogLeNet（已完成）\n    * [GoogLeNet网络讲解](https://www.bilibili.com/video/BV1z7411T7ie)\n    * [Pytorch搭建GoogLeNet网络](https://www.bilibili.com/video/BV1r7411T7M5)\n    * [Tensorflow2搭建GoogLeNet网络](https://www.bilibili.com/video/BV1a7411T7Ht)\n\n  * ResNet（已完成）\n    * [ResNet网络讲解](https://www.bilibili.com/video/BV1T7411T7wa)\n    * [Pytorch搭建ResNet网络](https://www.bilibili.com/video/BV14E411H7Uw)\n    * [Tensorflow2搭建ResNet网络](https://www.bilibili.com/video/BV1WE41177Ya)\n\n  * ResNeXt (已完成)\n    * [ResNeXt网络讲解](https://www.bilibili.com/video/BV1Ap4y1p71v/)\n    * [Pytorch搭建ResNeXt网络](https://www.bilibili.com/video/BV1rX4y1N7tE)\n\n  * MobileNet_V1_V2（已完成）\n    * [MobileNet_V1_V2网络讲解](https://www.bilibili.com/video/BV1yE411p7L7)\n    * [Pytorch搭建MobileNetV2网络](https://www.bilibili.com/video/BV1qE411T7qZ)\n    * [Tensorflow2搭建MobileNetV2网络](https://www.bilibili.com/video/BV1NE411K7tX)\n\n  * MobileNet_V3（已完成）\n    * [MobileNet_V3网络讲解](https://www.bilibili.com/video/BV1GK4y1p7uE)\n    * [Pytorch搭建MobileNetV3网络](https://www.bilibili.com/video/BV1zT4y1P7pd)\n    * [Tensorflow2搭建MobileNetV3网络](https://www.bilibili.com/video/BV1KA411g7wX)\n\n  * ShuffleNet_V1_V2 (已完成)\n    * [ShuffleNet_V1_V2网络讲解](https://www.bilibili.com/video/BV15y4y1Y7SY)\n    * [使用Pytorch搭建ShuffleNetV2](https://www.bilibili.com/video/BV1dh411r76X)\n    * [使用Tensorflow2搭建ShuffleNetV2](https://www.bilibili.com/video/BV1kr4y1N7bh)\n\n  * EfficientNet_V1（已完成）\n    * [EfficientNet网络讲解](https://www.bilibili.com/video/BV1XK4y1U7PX)\n    * [使用Pytorch搭建EfficientNet](https://www.bilibili.com/video/BV19z4y1179h/)\n    * [使用Tensorflow2搭建EfficientNet](https://www.bilibili.com/video/BV1PK4y1S7Jf)\n\n  * EfficientNet_V2 (已完成)\n    * [EfficientNetV2网络讲解](https://www.bilibili.com/video/BV19v41157AU)\n    * [使用Pytorch搭建EfficientNetV2](https://www.bilibili.com/video/BV1Xy4y1g74u)\n    * [使用Tensorflow搭建EfficientNetV2](https://www.bilibili.com/video/BV19K4y1g7m4)\n  \n  * RepVGG（已完成）\n    * [RepVGG网络讲解](https://www.bilibili.com/video/BV15f4y1o7QR)\n\n  * Vision Transformer(已完成)\n    * [Multi-Head Attention讲解](https://www.bilibili.com/video/BV15v411W78M)\n    * [Vision Transformer网络讲解](https://www.bilibili.com/video/BV1Jh411Y7WQ)\n    * [使用Pytorch搭建Vision Transformer](https://www.bilibili.com/video/BV1AL411W7dT)\n    * [使用tensorflow2搭建Vision Transformer](https://www.bilibili.com/video/BV1q64y1X7GY)\n\n  * Swin Transformer(已完成)\n    * [Swin Transformer网络讲解](https://www.bilibili.com/video/BV1pL4y1v7jC)\n    * [使用Pytorch搭建Swin Transformer](https://www.bilibili.com/video/BV1yg411K7Yc)\n    * [使用Tensorflow2搭建Swin Transformer](https://www.bilibili.com/video/BV1bR4y1t7qT)\n\n  * ConvNeXt(已完成)\n    * [ConvNeXt网络讲解](https://www.bilibili.com/video/BV1SS4y157fu)\n    * [使用Pytorch搭建ConvNeXt](https://www.bilibili.com/video/BV14S4y1L791)\n    * [使用Tensorflow2搭建ConvNeXt](https://www.bilibili.com/video/BV1TS4y1V7Gz)\n\n  * MobileViT(已完成)\n    * [MobileViT网络讲解](https://www.bilibili.com/video/BV1TG41137sb)\n    * [使用Pytorch搭建MobileViT](https://www.bilibili.com/video/BV1ae411L7Ki)\n\n* 目标检测\n  * Faster-RCNN/FPN（已完成）\n    * [Faster-RCNN网络讲解](https://www.bilibili.com/video/BV1af4y1m7iL)\n    * [FPN网络讲解](https://www.bilibili.com/video/BV1dh411U7D9)\n    * [Faster-RCNN源码解析(Pytorch)](https://www.bilibili.com/video/BV1of4y1m7nj)\n\n  * SSD/RetinaNet (已完成)\n    * [SSD网络讲解](https://www.bilibili.com/video/BV1fT4y1L7Gi)\n    * [RetinaNet网络讲解](https://www.bilibili.com/video/BV1Q54y1L7sM)\n    * [SSD源码解析(Pytorch)](https://www.bilibili.com/video/BV1vK411H771)\n\n  * YOLO Series (已完成)\n    * [YOLO系列网络讲解(V1~V3)](https://www.bilibili.com/video/BV1yi4y1g7ro)\n    * [YOLOv3 SPP源码解析(Pytorch版)](https://www.bilibili.com/video/BV1t54y1C7ra)\n    * [YOLOV4网络讲解](https://www.bilibili.com/video/BV1NF41147So)\n    * [YOLOV5网络讲解](https://www.bilibili.com/video/BV1T3411p7zR)\n    * [YOLOX 网络讲解](https://www.bilibili.com/video/BV1JW4y1k76c)\n  \n  * FCOS（已完成）\n    * [FCOS网络讲解](https://www.bilibili.com/video/BV1G5411X7jw)\n\n* 语义分割 \n  * FCN (已完成)\n    * [FCN网络讲解](https://www.bilibili.com/video/BV1J3411C7zd)\n    * [FCN源码解析(Pytorch版)](https://www.bilibili.com/video/BV19q4y1971Q)\n\n  * DeepLabV3 (已完成)\n    * [DeepLabV1网络讲解](https://www.bilibili.com/video/BV1SU4y1N7Ao)\n    * [DeepLabV2网络讲解](https://www.bilibili.com/video/BV1gP4y1G7TC)\n    * [DeepLabV3网络讲解](https://www.bilibili.com/video/BV1Jb4y1q7j7)\n    * [DeepLabV3源码解析(Pytorch版)](https://www.bilibili.com/video/BV1TD4y1c7Wx)\n\n  * LR-ASPP (已完成)\n    * [LR-ASPP网络讲解](https://www.bilibili.com/video/BV1LS4y1M76E)\n    * [LR-ASPP源码解析(Pytorch版)](https://www.bilibili.com/video/bv13D4y1F7ML)\n  \n  * U-Net (已完成)\n    * [U-Net网络讲解](https://www.bilibili.com/video/BV1Vq4y127fB/)\n    * [U-Net源码解析(Pytorch版)](https://www.bilibili.com/video/BV1Vq4y127fB)\n  \n  * U2Net (已完成)\n    * [U2Net网络讲解](https://www.bilibili.com/video/BV1yB4y1z7mj)\n    * [U2Net源码解析(Pytorch版)](https://www.bilibili.com/video/BV1Kt4y137iS)\n\n* 实例分割\n  * Mask R-CNN（已完成）\n    * [Mask R-CNN网络讲解](https://www.bilibili.com/video/BV1ZY411774T)\n    * [Mask R-CNN源码解析(Pytorch版)](https://www.bilibili.com/video/BV1hY411E7wD)\n\n* 关键点检测\n  * DeepPose（已完成）\n    * [DeepPose网络讲解](https://www.bilibili.com/video/BV1bm421g7aJ)\n    * [DeepPose源码解析(Pytorch版)](https://www.bilibili.com/video/BV1bm421g7aJ)\n\n  * HRNet（已完成）\n    * [HRNet网络讲解](https://www.bilibili.com/video/BV1bB4y1y7qP)\n    * [HRNet源码解析(Pytorch版)](https://www.bilibili.com/video/BV1ar4y157JM)\n\n**[更多相关视频请进入我的bilibili频道查看](https://space.bilibili.com/18161609/channel/index)**\n\n---\n\n欢迎大家关注下我的微信公众号（**阿喆学习小记**），平时会总结些相关学习博文。    \n\n如果有什么问题，也可以到我的CSDN中一起讨论。\n[https://blog.csdn.net/qq_37541097/article/details/103482003](https://blog.csdn.net/qq_37541097/article/details/103482003)\n\n我的bilibili频道：\n[https://space.bilibili.com/18161609/channel/index](https://space.bilibili.com/18161609/channel/index)\n"
  },
  {
    "path": "article_link/README.md",
    "content": "# 文献链接\n\n## 图像分类(Classification)\n- LeNet [http://yann.lecun.com/exdb/lenet/index.html](http://yann.lecun.com/exdb/lenet/index.html)\n- AlexNet [http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf)\n- ZFNet(Visualizing and Understanding Convolutional Networks) [https://arxiv.org/abs/1311.2901](https://arxiv.org/abs/1311.2901)\n- VGG [https://arxiv.org/abs/1409.1556](https://arxiv.org/abs/1409.1556)\n- GoogLeNet, Inceptionv1(Going deeper with convolutions) [https://arxiv.org/abs/1409.4842](https://arxiv.org/abs/1409.4842)\n- Batch Normalization [https://arxiv.org/abs/1502.03167](https://arxiv.org/abs/1502.03167)\n- Inceptionv3(Rethinking the Inception Architecture for Computer Vision) [https://arxiv.org/abs/1512.00567](https://arxiv.org/abs/1512.00567)\n- Inceptionv4, Inception-ResNet [https://arxiv.org/abs/1602.07261](https://arxiv.org/abs/1602.07261)\n- Xception(Deep Learning with Depthwise Separable Convolutions) [https://arxiv.org/abs/1610.02357](https://arxiv.org/abs/1610.02357)\n- ResNet [https://arxiv.org/abs/1512.03385](https://arxiv.org/abs/1512.03385)\n- ResNeXt [https://arxiv.org/abs/1611.05431](https://arxiv.org/abs/1611.05431)\n- DenseNet [https://arxiv.org/abs/1608.06993](https://arxiv.org/abs/1608.06993)\n- NASNet-A(Learning Transferable Architectures for Scalable Image Recognition) [https://arxiv.org/abs/1707.07012](https://arxiv.org/abs/1707.07012)\n- SENet(Squeeze-and-Excitation Networks) [https://arxiv.org/abs/1709.01507](https://arxiv.org/abs/1709.01507)\n- MobileNet(v1) [https://arxiv.org/abs/1704.04861](https://arxiv.org/abs/1704.04861)\n- MobileNet(v2) [https://arxiv.org/abs/1801.04381](https://arxiv.org/abs/1801.04381)\n- MobileNet(v3) [https://arxiv.org/abs/1905.02244](https://arxiv.org/abs/1905.02244)\n- ShuffleNet(v1) [https://arxiv.org/abs/1707.01083](https://arxiv.org/abs/1707.01083)\n- ShuffleNet(v2) [https://arxiv.org/abs/1807.11164](https://arxiv.org/abs/1807.11164)\n- Bag of Tricks for Image Classification with Convolutional Neural Networks [https://arxiv.org/abs/1812.01187](https://arxiv.org/abs/1812.01187)\n- EfficientNet(v1) [https://arxiv.org/abs/1905.11946](https://arxiv.org/abs/1905.11946)\n- EfficientNet(v2) [https://arxiv.org/abs/2104.00298](https://arxiv.org/abs/2104.00298)\n- CSPNet [https://arxiv.org/abs/1911.11929](https://arxiv.org/abs/1911.11929)\n- RegNet [https://arxiv.org/abs/2003.13678](https://arxiv.org/abs/2003.13678)\n- NFNets(High-Performance Large-Scale Image Recognition Without Normalization) [https://arxiv.org/abs/2102.06171](https://arxiv.org/abs/2102.06171)\n- Vision Transformer [https://arxiv.org/abs/2010.11929](https://arxiv.org/abs/2010.11929)\n- DeiT(Training data-efficient image transformers ) [https://arxiv.org/abs/2012.12877](https://arxiv.org/abs/2012.12877)\n- Swin Transformer [https://arxiv.org/abs/2103.14030](https://arxiv.org/abs/2103.14030)\n- Swin Transformer V2: Scaling Up Capacity and Resolution [https://arxiv.org/abs/2111.09883](https://arxiv.org/abs/2111.09883)\n- BEiT: BERT Pre-Training of Image Transformers [https://arxiv.org/abs/2106.08254](https://arxiv.org/abs/2106.08254)\n- MAE(Masked Autoencoders Are Scalable Vision Learners) [https://arxiv.org/abs/2111.06377](https://arxiv.org/abs/2111.06377)\n- ConvNeXt(A ConvNet for the 2020s) [https://arxiv.org/abs/2201.03545](https://arxiv.org/abs/2201.03545)\n- MobileViT V1 [https://arxiv.org/abs/2110.02178](https://arxiv.org/abs/2110.02178)\n- MobileViT V2(Separable Self-attention for Mobile Vision Transformers) [https://arxiv.org/abs/2206.02680](https://arxiv.org/abs/2206.02680)\n- MobileOne(An Improved One millisecond Mobile Backbone) [https://arxiv.org/abs/2206.04040](https://arxiv.org/abs/2206.04040)\n\n\n## 目标检测(Object Detection)\n- R-CNN [https://arxiv.org/abs/1311.2524](https://arxiv.org/abs/1311.2524)\n- Fast R-CNN [https://arxiv.org/abs/1504.08083](https://arxiv.org/abs/1504.08083)\n- Faster R-CNN [https://arxiv.org/abs/1506.01497](https://arxiv.org/abs/1506.01497)\n- Cascade R-CNN: Delving into High Quality Object Detection [https://arxiv.org/abs/1712.00726](https://arxiv.org/abs/1712.00726)\n- Mask R-CNN [https://arxiv.org/abs/1703.06870](https://arxiv.org/abs/1703.06870)\n- SSD [https://arxiv.org/abs/1512.02325](https://arxiv.org/abs/1512.02325)\n- FPN(Feature Pyramid Networks for Object Detection) [https://arxiv.org/abs/1612.03144](https://arxiv.org/abs/1612.03144)\n- RetinaNet(Focal Loss for Dense Object Detection) [https://arxiv.org/abs/1708.02002](https://arxiv.org/abs/1708.02002)\n- Bag of Freebies for Training Object Detection Neural Networks [https://arxiv.org/abs/1902.04103](https://arxiv.org/abs/1902.04103)\n- YOLOv1 [https://arxiv.org/abs/1506.02640](https://arxiv.org/abs/1506.02640)\n- YOLOv2 [https://arxiv.org/abs/1612.08242](https://arxiv.org/abs/1612.08242)\n- YOLOv3 [https://arxiv.org/abs/1804.02767](https://arxiv.org/abs/1804.02767)\n- YOLOv4 [https://arxiv.org/abs/2004.10934](https://arxiv.org/abs/2004.10934)\n- YOLOX(Exceeding YOLO Series in 2021) [https://arxiv.org/abs/2107.08430](https://arxiv.org/abs/2107.08430)\n- YOLOv7 [https://arxiv.org/abs/2207.02696](https://arxiv.org/abs/2207.02696)\n- PP-YOLO [https://arxiv.org/abs/2007.12099](https://arxiv.org/abs/2007.12099)\n- PP-YOLOv2 [https://arxiv.org/abs/2104.10419](https://arxiv.org/abs/2104.10419)\n- CornerNet [https://arxiv.org/abs/1808.01244](https://arxiv.org/abs/1808.01244)\n- FCOS(Old) [https://arxiv.org/abs/1904.01355](https://arxiv.org/abs/1904.01355)\n- FCOS(New) [https://arxiv.org/abs/2006.09214](https://arxiv.org/abs/2006.09214)\n- CenterNet [https://arxiv.org/abs/1904.07850](https://arxiv.org/abs/1904.07850)\n\n\n## 语义分割(Semantic Segmentation)\n- FCN(Fully Convolutional Networks for Semantic Segmentation) [https://arxiv.org/abs/1411.4038](https://arxiv.org/abs/1411.4038)\n- UNet(U-Net: Convolutional Networks for Biomedical Image Segmentation) [https://arxiv.org/abs/1505.04597](https://arxiv.org/abs/1505.04597)\n- DeepLabv1(Semantic Image Segmentation with Deep Convolutional Nets and Fully Connected CRFs) [https://arxiv.org/abs/1412.7062](https://arxiv.org/abs/1412.7062)\n- DeepLabv2(Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs) [https://arxiv.org/abs/1606.00915](https://arxiv.org/abs/1606.00915)\n- DeepLabv3(Rethinking Atrous Convolution for Semantic Image Segmentation) [https://arxiv.org/abs/1706.05587](https://arxiv.org/abs/1706.05587)\n- DeepLabv3+(Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation) [https://arxiv.org/abs/1802.02611](https://arxiv.org/abs/1802.02611)\n- SegFormer [https://arxiv.org/abs/2105.15203](https://arxiv.org/abs/2105.15203)\n\n\n## 显著性目标检测(Salient Object Detection)\n- U2Net [https://arxiv.org/abs/2005.09007](https://arxiv.org/abs/2005.09007)\n\n\n## 实例分割(Instance Segmentation)\n- Mask R-CNN [https://arxiv.org/abs/1703.06870](https://arxiv.org/abs/1703.06870)\n\n\n## 关键点检测(Keypoint Detection)\n- HRNet(Deep High-Resolution Representation Learning for Human Pose Estimation) [https://arxiv.org/abs/1902.09212](https://arxiv.org/abs/1902.09212)\n\n## 网络量化(Quantization)\n- Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference [https://arxiv.org/abs/1712.05877](https://arxiv.org/abs/1712.05877)\n- Quantizing deep convolutional networks for efficient inference: A whitepaper [https://arxiv.org/abs/1806.08342](https://arxiv.org/abs/1806.08342)\n- Data-Free Quantization Through Weight Equalization and Bias Correction [https://arxiv.org/abs/1906.04721](https://arxiv.org/abs/1906.04721)\n- LSQ: Learned Step Size Quantization [https://arxiv.org/abs/1902.08153](https://arxiv.org/abs/1902.08153)\n- LSQ+: Improving low-bit quantization through learnable offsets and better initialization [https://arxiv.org/abs/2004.09576](https://arxiv.org/abs/2004.09576)\n\n\n\n## 自然语言处理\n- Attention Is All You Need [https://arxiv.org/abs/1706.03762](https://arxiv.org/abs/1706.03762)\n\n## Others\n- Microsoft COCO: Common Objects in Context [https://arxiv.org/abs/1405.0312](https://arxiv.org/abs/1405.0312)\n- The PASCALVisual Object Classes Challenge: A Retrospective [http://host.robots.ox.ac.uk/pascal/VOC/pubs/everingham15.pdf](http://host.robots.ox.ac.uk/pascal/VOC/pubs/everingham15.pdf)\n- Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization [https://arxiv.org/abs/1610.02391](https://arxiv.org/abs/1610.02391)\n"
  },
  {
    "path": "course_ppt/README.md",
    "content": "# 为了精简项目，课程中的所有ppt都已转存至百度云\n\n**所有PPT都放在该文件夹中** 链接：https://pan.baidu.com/s/1VL6QTQ86sfY2aMDVo4Z-kg 提取码：4ydw\n\n**下面为单独每个ppt的链接**：\n## 分类网络相关\n- **AlexNet** 链接: https://pan.baidu.com/s/1RJn5lzY8LwrmckUPvXcjmg  提取码: 34ue\n- **VGG** 链接: https://pan.baidu.com/s/1BnYpdaDwAIcgRm7YwakEZw  提取码: 8ev0\n- **GoogleNet** 链接: https://pan.baidu.com/s/1XjZXprvayV3dDMvLjoOk3A  提取码: 9hq4\n- **ResNet** 链接: https://pan.baidu.com/s/1I2LUlwCSjNKr37T0n3NKzg  提取码: f1s9\n- **ResNext** 链接：https://pan.baidu.com/s/1-anFYX5572MJmiQym9D4Eg 提取码：f8ob \n- **MobileNet_v1_v2** 链接: https://pan.baidu.com/s/1ReDDCuK8wyH0XqniUgiSYQ  提取码: ipqv\n- **MobileNet_v3**  链接：https://pan.baidu.com/s/13mzSpyxuA4T4ki7kEN1Xqw 提取码：fp5g \n- **ShuffleNet_v1_v2** 链接：https://pan.baidu.com/s/1-DDwePMPCDvjw08YU8nAAA 提取码：ad6n\n- **EfficientNet_v1** 链接：https://pan.baidu.com/s/1Sep9W0vLzfjhcHAXr6Bv0Q  提取码：eufl \n- **EfficientNet_v2** 链接：https://pan.baidu.com/s/1tesrgY4CHLmq6P7s7TcHCw  提取码：y2kz\n- **Transformer** 链接：https://pan.baidu.com/s/1DE6RDySr7NS0HQ35gBqP_g 提取码：y9e7\n- **Vision Transformer** 链接：https://pan.baidu.com/s/1wzpHG8EK5gxg6UCMscYqMw 提取码：cm1m\n- **Swin Transformer** 链接：https://pan.baidu.com/s/1O6XEEZUb6B6AGYON7-EOgA 提取码：qkrn\n- **ConvNeXt** 链接：https://pan.baidu.com/s/1mgZjkirJPZ8huVls-O0xXA  提取码：kvqx\n- **RepVGG** 链接：https://pan.baidu.com/s/1uJP3hCHI79-tUdBNR_VAWQ  提取码：qe8a\n- **MobileViT** 链接：https://pan.baidu.com/s/1F8QJtFhTPWX8Vjr8_97scQ  提取码：lfn5\n- **ConfusionMatrix** 链接: https://pan.baidu.com/s/1EtKzHkZyv2XssYtqmGYCLg  提取码: uoo5\n- **Grad-CAM** 链接：https://pan.baidu.com/s/1ZHKBW7hINQXFI36hBYdC0Q  提取码：aru7\n\n\n## 目标检测网络相关\n- **R-CNN** 链接: https://pan.baidu.com/s/1l_ZxkfJdyp3KoMLqwWbx5A  提取码: nm1l\n- **Fast R-CNN** 链接: https://pan.baidu.com/s/1Pe_Tg43OVo-yZWj7t-_L6Q  提取码: fe73\n- **Faster R-CNN** 链接：https://pan.baidu.com/s/1Dd0d_LY8l7Y1YkHQhp-WfA  提取码：vzp4\n- **FPN** 链接：https://pan.baidu.com/s/1O9H0iqQMg9f_FZezUEKZ9g 提取码：qbl8 \n- **SSD** 链接: https://pan.baidu.com/s/15zF3GhIdg-E_tZX2Y2X-rw  提取码: u7k1\n- **RetinaNet**  链接：https://pan.baidu.com/s/1beW612VCSnSu-v8iu_2-fA 提取码：vqbu \n- **YOLOv1** 链接: https://pan.baidu.com/s/1vVyUNQHYEGjqosezlx_1Mg  提取码: b3i0\n- **YOLOv2** 链接: https://pan.baidu.com/s/132aW1e_NYbaxxGi3cDVLYg  提取码: tak7\n- **YOLOv3** 链接：https://pan.baidu.com/s/1hZqdgh7wA7QeGAYTttlVOQ  提取码：5ulo\n- **YOLOv3SPP** 链接: https://pan.baidu.com/s/15LRssnPez9pn6jRpW89Wlw  提取码: nv9f\n- **YOLOv4** 链接：https://pan.baidu.com/s/1Ltw4v1pg0eZNFYR2ZBbZmQ  提取码：qjx4\n- **YOLOv5** 链接：https://pan.baidu.com/s/1rnvjwHLvOlJ9KpJ5z95GWw  提取码：kt04\n- **YOLOX** 链接：https://pan.baidu.com/s/1ex54twQC7hBE3szNko_K5A  提取码：al0r\n- **FCOS** 链接: https://pan.baidu.com/s/1KUc9dzvAbtwtGGm3ZZy_cw  提取码: h0as\n- **Calculate mAP** 链接: https://pan.baidu.com/s/1jdA_n78J7nSUoOg6TTO5Bg  提取码: eh62\n- **coco数据集简介** 链接：https://pan.baidu.com/s/1HfCvjt-8o9j5a916IYNVjw  提取码：6rec \n\n\n## 图像分割网络相关\n- **语义分割前言** 链接：https://pan.baidu.com/s/1cwxe2wbaA_2DqNYADq3myA 提取码：zzij\n- **转置卷积** 链接：https://pan.baidu.com/s/1A8688168fuWHyxJQtzupHw 提取码：pgnf\n- **FCN** 链接：https://pan.baidu.com/s/1XLUneTLrdUyDAiV6kqi9rw 提取码：126a\n- **膨胀卷积** 链接：https://pan.baidu.com/s/1QlQyniuMhBeXyEK420MIdQ 提取码：ry6p\n- **DeepLab V1** 链接：https://pan.baidu.com/s/1NFxb7ADQOMVYLxmIKqTONQ  提取码：500s\n- **DeepLab V2** 链接：https://pan.baidu.com/s/1woe3lJYBVkOdnn6XXlKf8g 提取码：76ec\n- **DeepLab V3** 链接：https://pan.baidu.com/s/1WVBgc2Ld13D0_dkHGwhTpA 提取码：m54m\n- **UNet** 链接: https://pan.baidu.com/s/1WDwI-DuzYklMvwyRxVUXjA 提取码: rd4j\n- **U2Net**  链接：https://pan.baidu.com/s/1ekbEm4dsjlFamK8dCs8yfA  提取码：472j\n\n\n## 实例分割\n- **Mask R-CNN** 链接：https://pan.baidu.com/s/1JpQ7ENEv_x9A1-O_NpjwYA 提取码：1t4i\n\n## 关键点检测\n- **HRNet** 链接: https://pan.baidu.com/s/1-8AJdU82K1j70KZK_rN7aQ  提取码: t4me\n\n"
  },
  {
    "path": "data_set/README.md",
    "content": "## 该文件夹是用来存放训练数据的目录\n### 使用步骤如下：\n* （1）在data_set文件夹下创建新文件夹\"flower_data\"\n* （2）点击链接下载花分类数据集 [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz)\n* （3）解压数据集到flower_data文件夹下\n* （4）执行\"split_data.py\"脚本自动将数据集划分成训练集train和验证集val    \n\n```\n├── flower_data   \n       ├── flower_photos（解压的数据集文件夹，3670个样本）  \n       ├── train（生成的训练集，3306个样本）  \n       └── val（生成的验证集，364个样本） \n```\n"
  },
  {
    "path": "data_set/split_data.py",
    "content": "import os\nfrom shutil import copy, rmtree\nimport random\n\n\ndef mk_file(file_path: str):\n    if os.path.exists(file_path):\n        # 如果文件夹存在，则先删除原文件夹在重新创建\n        rmtree(file_path)\n    os.makedirs(file_path)\n\n\ndef main():\n    # 保证随机可复现\n    random.seed(0)\n\n    # 将数据集中10%的数据划分到验证集中\n    split_rate = 0.1\n\n    # 指向你解压后的flower_photos文件夹\n    cwd = os.getcwd()\n    data_root = os.path.join(cwd, \"flower_data\")\n    origin_flower_path = os.path.join(data_root, \"flower_photos\")\n    assert os.path.exists(origin_flower_path), \"path '{}' does not exist.\".format(origin_flower_path)\n\n    flower_class = [cla for cla in os.listdir(origin_flower_path)\n                    if os.path.isdir(os.path.join(origin_flower_path, cla))]\n\n    # 建立保存训练集的文件夹\n    train_root = os.path.join(data_root, \"train\")\n    mk_file(train_root)\n    for cla in flower_class:\n        # 建立每个类别对应的文件夹\n        mk_file(os.path.join(train_root, cla))\n\n    # 建立保存验证集的文件夹\n    val_root = os.path.join(data_root, \"val\")\n    mk_file(val_root)\n    for cla in flower_class:\n        # 建立每个类别对应的文件夹\n        mk_file(os.path.join(val_root, cla))\n\n    for cla in flower_class:\n        cla_path = os.path.join(origin_flower_path, cla)\n        images = os.listdir(cla_path)\n        num = len(images)\n        # 随机采样验证集的索引\n        eval_index = random.sample(images, k=int(num*split_rate))\n        for index, image in enumerate(images):\n            if image in eval_index:\n                # 将分配至验证集中的文件复制到相应目录\n                image_path = os.path.join(cla_path, image)\n                new_path = os.path.join(val_root, cla)\n                copy(image_path, new_path)\n            else:\n                # 将分配至训练集中的文件复制到相应目录\n                image_path = os.path.join(cla_path, image)\n                new_path = os.path.join(train_root, cla)\n                copy(image_path, new_path)\n            print(\"\\r[{}] processing [{}/{}]\".format(cla, index+1, num), end=\"\")  # processing bar\n        print()\n\n    print(\"processing done!\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_onnx_cls/class_indices.json",
    "content": "{\n    \"0\": \"daisy\",\n    \"1\": \"dandelion\",\n    \"2\": \"roses\",\n    \"3\": \"sunflowers\",\n    \"4\": \"tulips\"\n}"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_onnx_cls/main.py",
    "content": "from PIL import Image\nimport torchvision.transforms as transforms\nimport torch\nimport torch.onnx\nimport onnx\nimport onnxruntime\nimport numpy as np\nfrom model import resnet34\n\ndevice = torch.device(\"cpu\")\n\n\ndef to_numpy(tensor):\n    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()\n\n\ndef main(save_path=None):\n    assert isinstance(save_path, str), \"lack of save_path parameter...\"\n    # create model\n    model = resnet34(num_classes=5)\n    # load model weights\n    model_weight_path = \"./resNet34.pth\"\n    model.load_state_dict(torch.load(model_weight_path, map_location=device))\n    model.eval()\n    # input to the model\n    # [batch, channel, height, width]\n    x = torch.rand(1, 3, 224, 224, requires_grad=True)\n    torch_out = model(x)\n\n    # export the model\n    torch.onnx.export(model,                       # model being run\n                      x,                           # model input (or a tuple for multiple inputs)\n                      save_path,                   # where to save the model (can be a file or file-like object)\n                      export_params=True,          # store the trained parameter weights inside the model file\n                      opset_version=10,            # the ONNX version to export the model to\n                      do_constant_folding=True,    # whether to execute constant folding for optimization\n                      input_names=[\"input\"],       # the model's input names\n                      output_names=[\"output\"],     # the model's output names\n                      dynamic_axes={\"input\": {0: \"batch_size\"},  # variable length axes\n                                    \"output\": {0: \"batch_size\"}})\n\n    # check onnx model\n    onnx_model = onnx.load(save_path)\n    onnx.checker.check_model(onnx_model)\n\n    ort_session = onnxruntime.InferenceSession(save_path)\n\n    # compute ONNX Runtime output prediction\n    ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}\n    ort_outs = ort_session.run(None, ort_inputs)\n\n    # compare ONNX Runtime and Pytorch results\n    # assert_allclose: Raises an AssertionError if two objects are not equal up to desired tolerance.\n    np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05)\n    print(\"Exported model has been tested with ONNXRuntime, and the result looks good!\")\n\n    # load test image\n    img = Image.open(\"../tulip.jpg\")\n\n    # pre-process\n    preprocess = transforms.Compose([transforms.Resize([224, 224]),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])\n    img = preprocess(img)\n    img = img.unsqueeze_(0)\n\n    # feed image into onnx model\n    ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(img)}\n    ort_outs = ort_session.run(None, ort_inputs)\n    prediction = ort_outs[0]\n\n    # np softmax process\n    prediction -= np.max(prediction, keepdims=True)  # 为了稳定地计算softmax概率， 一般会减掉最大元素\n    prediction = np.exp(prediction) / np.sum(np.exp(prediction), keepdims=True)\n    print(prediction)\n\n\nif __name__ == '__main__':\n    onnx_file_name = \"resnet34.onnx\"\n    main(save_path=onnx_file_name)\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_onnx_cls/model.py",
    "content": "import torch.nn as nn\nimport torch\n\n\nclass BasicBlock(nn.Module):\n    expansion = 1\n\n    def __init__(self, in_channel, out_channel, stride=1, downsample=None):\n        super(BasicBlock, self).__init__()\n        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,\n                               kernel_size=3, stride=stride, padding=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(out_channel)\n        self.relu = nn.ReLU()\n        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,\n                               kernel_size=3, stride=1, padding=1, bias=False)\n        self.bn2 = nn.BatchNorm2d(out_channel)\n        self.downsample = downsample\n\n    def forward(self, x):\n        identity = x\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass Bottleneck(nn.Module):\n    expansion = 4\n\n    def __init__(self, in_channel, out_channel, stride=1, downsample=None):\n        super(Bottleneck, self).__init__()\n        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,\n                               kernel_size=1, stride=1, bias=False)  # squeeze channels\n        self.bn1 = nn.BatchNorm2d(out_channel)\n        # -----------------------------------------\n        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,\n                               kernel_size=3, stride=stride, bias=False, padding=1)\n        self.bn2 = nn.BatchNorm2d(out_channel)\n        # -----------------------------------------\n        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion,\n                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels\n        self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n\n    def forward(self, x):\n        identity = x\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass ResNet(nn.Module):\n\n    def __init__(self, block, blocks_num, num_classes=1000, include_top=True):\n        super(ResNet, self).__init__()\n        self.include_top = include_top\n        self.in_channel = 64\n\n        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,\n                               padding=3, bias=False)\n        self.bn1 = nn.BatchNorm2d(self.in_channel)\n        self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.layer1 = self._make_layer(block, 64, blocks_num[0])\n        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)\n        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)\n        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)\n        if self.include_top:\n            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)\n            self.fc = nn.Linear(512 * block.expansion, num_classes)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')\n\n    def _make_layer(self, block, channel, block_num, stride=1):\n        downsample = None\n        if stride != 1 or self.in_channel != channel * block.expansion:\n            downsample = nn.Sequential(\n                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),\n                nn.BatchNorm2d(channel * block.expansion))\n\n        layers = []\n        layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride))\n        self.in_channel = channel * block.expansion\n\n        for _ in range(1, block_num):\n            layers.append(block(self.in_channel, channel))\n\n        return nn.Sequential(*layers)\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n\n        if self.include_top:\n            x = self.avgpool(x)\n            x = torch.flatten(x, 1)\n            x = self.fc(x)\n\n        return x\n\n\ndef resnet34(num_classes=1000, include_top=True):\n    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)\n\n\ndef resnet101(num_classes=1000, include_top=True):\n    return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/README.md",
    "content": "本项目展示如何将Pytorch中的ResNet34网络转成Openvino的IR格式，并进行量化处理，具体使用流程如下：\n1. 按照`requirements.txt`配置环境\n2. 下载事先训练好的ResNet34权重（之前在花分类数据集上训练得到的）放在当前文件夹下。百度云链接: https://pan.baidu.com/s/1x4WFX1HynYcXLium3UaaFQ  密码: qvi6\n3. 使用`convert_pytorch2onnx.py`将Resnet34转成ONNX格式\n4. 在命令行中使用以下指令将ONNX转成IR格式：\n```\nmo  --input_model resnet34.onnx \\\n    --input_shape \"[1,3,224,224]\" \\\n    --mean_values=\"[123.675,116.28,103.53]\" \\\n    --scale_values=\"[58.395,57.12,57.375]\" \\\n    --data_type FP32 \\\n    --output_dir ir_output\n```\n5. 下载并解压花分类数据集，将`quantization_int8.py`中的`data_path`指向解压后的`flower_photos`\n6. 使用`quantization_int8.py`量化模型"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/compare_fps.py",
    "content": "import time\nimport numpy as np\nimport torch\nimport onnxruntime\nimport matplotlib.pyplot as plt\nfrom openvino.runtime import Core\nfrom torchvision.models import resnet34\n\n\ndef normalize(image: np.ndarray) -> np.ndarray:\n    \"\"\"\n    Normalize the image to the given mean and standard deviation\n    \"\"\"\n    image = image.astype(np.float32)\n    mean = (0.485, 0.456, 0.406)\n    std = (0.229, 0.224, 0.225)\n    image /= 255.0\n    image -= mean\n    image /= std\n    return image\n\n\ndef onnx_inference(onnx_path: str, image: np.ndarray, num_images: int = 20):\n    # load onnx model\n    ort_session = onnxruntime.InferenceSession(onnx_path)\n\n    # compute onnx Runtime output prediction\n    ort_inputs = {ort_session.get_inputs()[0].name: image}\n\n    start = time.perf_counter()\n    for _ in range(num_images):\n        ort_session.run(None, ort_inputs)\n    end = time.perf_counter()\n    time_onnx = end - start\n    print(\n        f\"ONNX model in Inference Engine/CPU: {time_onnx / num_images:.3f} \"\n        f\"seconds per image, FPS: {num_images / time_onnx:.2f}\"\n    )\n\n    return num_images / time_onnx\n\n\ndef ir_inference(ir_path: str, image: np.ndarray, num_images: int = 20):\n    # Load the network in Inference Engine\n    ie = Core()\n    model_ir = ie.read_model(model=ir_path)\n    compiled_model_ir = ie.compile_model(model=model_ir, device_name=\"CPU\")\n\n    # Get input and output layers\n    input_layer_ir = next(iter(compiled_model_ir.inputs))\n    output_layer_ir = next(iter(compiled_model_ir.outputs))\n\n    start = time.perf_counter()\n    request_ir = compiled_model_ir.create_infer_request()\n    for _ in range(num_images):\n        request_ir.infer(inputs={input_layer_ir.any_name: image})\n    end = time.perf_counter()\n    time_ir = end - start\n    print(\n        f\"IR model in Inference Engine/CPU: {time_ir / num_images:.3f} \"\n        f\"seconds per image, FPS: {num_images / time_ir:.2f}\"\n    )\n\n    return num_images / time_ir\n\n\ndef pytorch_inference(image: np.ndarray, num_images: int = 20):\n    image = torch.as_tensor(image, dtype=torch.float32)\n\n    model = resnet34(pretrained=False, num_classes=5)\n    model.eval()\n\n    with torch.no_grad():\n        start = time.perf_counter()\n        for _ in range(num_images):\n            model(image)\n        end = time.perf_counter()\n        time_torch = end - start\n\n    print(\n        f\"PyTorch model on CPU: {time_torch / num_images:.3f} seconds per image, \"\n        f\"FPS: {num_images / time_torch:.2f}\"\n    )\n\n    return num_images / time_torch\n\n\ndef plot_fps(v: dict):\n    x = list(v.keys())\n    y = list(v.values())\n\n    plt.bar(range(len(x)), y, align='center')\n    plt.xticks(range(len(x)), x)\n    for i, v in enumerate(y):\n        plt.text(x=i, y=v+0.5, s=f\"{v:.2f}\", ha='center')\n    plt.xlabel('model format')\n    plt.ylabel('fps')\n    plt.title('FPS comparison')\n    plt.show()\n    plt.savefig('fps_vs.jpg')\n\n\ndef main():\n    image_h = 224\n    image_w = 224\n    onnx_path = \"resnet34.onnx\"\n    ir_path = \"ir_output/resnet34.xml\"\n\n    image = np.random.randn(image_h, image_w, 3)\n    normalized_image = normalize(image)\n\n    # Convert the resized images to network input shape\n    # [h, w, c] -> [c, h, w] -> [1, c, h, w]\n    input_image = np.expand_dims(np.transpose(image, (2, 0, 1)), 0)\n    normalized_input_image = np.expand_dims(np.transpose(normalized_image, (2, 0, 1)), 0)\n\n    onnx_fps = onnx_inference(onnx_path, normalized_input_image, num_images=100)\n    ir_fps = ir_inference(ir_path, input_image, num_images=100)\n    pytorch_fps = pytorch_inference(normalized_input_image, num_images=100)\n    plot_fps({\"pytorch\": round(pytorch_fps, 2),\n              \"onnx\": round(onnx_fps, 2),\n              \"ir\": round(ir_fps, 2)})\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/compare_onnx_and_ir.py",
    "content": "import numpy as np\nimport onnxruntime\nfrom openvino.runtime import Core\n\n\ndef normalize(image: np.ndarray) -> np.ndarray:\n    \"\"\"\n    Normalize the image to the given mean and standard deviation\n    \"\"\"\n    image = image.astype(np.float32)\n    mean = (0.485, 0.456, 0.406)\n    std = (0.229, 0.224, 0.225)\n    image /= 255.0\n    image -= mean\n    image /= std\n    return image\n\n\ndef onnx_inference(onnx_path: str, image: np.ndarray):\n    # load onnx model\n    ort_session = onnxruntime.InferenceSession(onnx_path)\n\n    # compute onnx Runtime output prediction\n    ort_inputs = {ort_session.get_inputs()[0].name: image}\n    res_onnx = ort_session.run(None, ort_inputs)[0]\n    return res_onnx\n\n\ndef ir_inference(ir_path: str, image: np.ndarray):\n    # Load the network in Inference Engine\n    ie = Core()\n    model_ir = ie.read_model(model=ir_path)\n    compiled_model_ir = ie.compile_model(model=model_ir, device_name=\"CPU\")\n\n    # Get input and output layers\n    input_layer_ir = next(iter(compiled_model_ir.inputs))\n    output_layer_ir = next(iter(compiled_model_ir.outputs))\n\n    # Run inference on the input image\n    res_ir = compiled_model_ir([image])[output_layer_ir]\n    return res_ir\n\n\ndef main():\n    image_h = 224\n    image_w = 224\n    onnx_path = \"resnet34.onnx\"\n    ir_path = \"ir_output/resnet34.xml\"\n\n    image = np.random.randn(image_h, image_w, 3)\n    normalized_image = normalize(image)\n\n    # Convert the resized images to network input shape\n    # [h, w, c] -> [c, h, w] -> [1, c, h, w]\n    input_image = np.expand_dims(np.transpose(image, (2, 0, 1)), 0)\n    normalized_input_image = np.expand_dims(np.transpose(normalized_image, (2, 0, 1)), 0)\n\n    onnx_res = onnx_inference(onnx_path, normalized_input_image)\n    ir_res = ir_inference(ir_path, input_image)\n    np.testing.assert_allclose(onnx_res, ir_res, rtol=1e-03, atol=1e-05)\n    print(\"Exported model has been tested with OpenvinoRuntime, and the result looks good!\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/convert_pytorch2onnx.py",
    "content": "import torch\nimport torch.onnx\nimport onnx\nimport onnxruntime\nimport numpy as np\nfrom torchvision.models import resnet34\n\ndevice = torch.device(\"cpu\")\n\n\ndef to_numpy(tensor):\n    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()\n\n\ndef main():\n    weights_path = \"resNet34(flower).pth\"\n    onnx_file_name = \"resnet34.onnx\"\n    batch_size = 1\n    img_h = 224\n    img_w = 224\n    img_channel = 3\n\n    # create model and load pretrain weights\n    model = resnet34(pretrained=False, num_classes=5)\n    model.load_state_dict(torch.load(weights_path, map_location='cpu'))\n\n    model.eval()\n    # input to the model\n    # [batch, channel, height, width]\n    x = torch.rand(batch_size, img_channel, img_h, img_w, requires_grad=True)\n    torch_out = model(x)\n\n    # export the model\n    torch.onnx.export(model,             # model being run\n                      x,                 # model input (or a tuple for multiple inputs)\n                      onnx_file_name,    # where to save the model (can be a file or file-like object)\n                      verbose=False)\n\n    # check onnx model\n    onnx_model = onnx.load(onnx_file_name)\n    onnx.checker.check_model(onnx_model)\n\n    ort_session = onnxruntime.InferenceSession(onnx_file_name)\n\n    # compute ONNX Runtime output prediction\n    ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}\n    ort_outs = ort_session.run(None, ort_inputs)\n\n    # compare ONNX Runtime and Pytorch results\n    # assert_allclose: Raises an AssertionError if two objects are not equal up to desired tolerance.\n    np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05)\n    print(\"Exported model has been tested with ONNXRuntime, and the result looks good!\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/model.py",
    "content": "from typing import Callable, List, Optional\n\nimport torch\nfrom torch import nn, Tensor\nfrom torch.nn import functional as F\nfrom functools import partial\n\n\ndef _make_divisible(ch, divisor=8, min_ch=None):\n    \"\"\"\n    This function is taken from the original tf repo.\n    It ensures that all layers have a channel number that is divisible by 8\n    It can be seen here:\n    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py\n    \"\"\"\n    if min_ch is None:\n        min_ch = divisor\n    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)\n    # Make sure that round down does not go down by more than 10%.\n    if new_ch < 0.9 * ch:\n        new_ch += divisor\n    return new_ch\n\n\nclass ConvBNActivation(nn.Sequential):\n    def __init__(self,\n                 in_planes: int,\n                 out_planes: int,\n                 kernel_size: int = 3,\n                 stride: int = 1,\n                 groups: int = 1,\n                 norm_layer: Optional[Callable[..., nn.Module]] = None,\n                 activation_layer: Optional[Callable[..., nn.Module]] = None):\n        padding = (kernel_size - 1) // 2\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n        if activation_layer is None:\n            activation_layer = nn.ReLU6\n        super(ConvBNActivation, self).__init__(nn.Conv2d(in_channels=in_planes,\n                                                         out_channels=out_planes,\n                                                         kernel_size=kernel_size,\n                                                         stride=stride,\n                                                         padding=padding,\n                                                         groups=groups,\n                                                         bias=False),\n                                               norm_layer(out_planes),\n                                               activation_layer(inplace=True))\n\n\nclass SqueezeExcitation(nn.Module):\n    def __init__(self, input_c: int, squeeze_factor: int = 4):\n        super(SqueezeExcitation, self).__init__()\n        squeeze_c = _make_divisible(input_c // squeeze_factor, 8)\n        self.fc1 = nn.Conv2d(input_c, squeeze_c, 1)\n        self.fc2 = nn.Conv2d(squeeze_c, input_c, 1)\n\n    def forward(self, x: Tensor) -> Tensor:\n        scale = F.adaptive_avg_pool2d(x, output_size=(1, 1))\n        scale = self.fc1(scale)\n        scale = F.relu(scale, inplace=True)\n        scale = self.fc2(scale)\n        scale = F.hardsigmoid(scale, inplace=True)\n        return scale * x\n\n\nclass InvertedResidualConfig:\n    def __init__(self,\n                 input_c: int,\n                 kernel: int,\n                 expanded_c: int,\n                 out_c: int,\n                 use_se: bool,\n                 activation: str,\n                 stride: int,\n                 width_multi: float):\n        self.input_c = self.adjust_channels(input_c, width_multi)\n        self.kernel = kernel\n        self.expanded_c = self.adjust_channels(expanded_c, width_multi)\n        self.out_c = self.adjust_channels(out_c, width_multi)\n        self.use_se = use_se\n        self.use_hs = activation == \"HS\"  # whether using h-swish activation\n        self.stride = stride\n\n    @staticmethod\n    def adjust_channels(channels: int, width_multi: float):\n        return _make_divisible(channels * width_multi, 8)\n\n\nclass InvertedResidual(nn.Module):\n    def __init__(self,\n                 cnf: InvertedResidualConfig,\n                 norm_layer: Callable[..., nn.Module]):\n        super(InvertedResidual, self).__init__()\n\n        if cnf.stride not in [1, 2]:\n            raise ValueError(\"illegal stride value.\")\n\n        self.use_res_connect = (cnf.stride == 1 and cnf.input_c == cnf.out_c)\n\n        layers: List[nn.Module] = []\n        activation_layer = nn.Hardswish if cnf.use_hs else nn.ReLU\n\n        # expand\n        if cnf.expanded_c != cnf.input_c:\n            layers.append(ConvBNActivation(cnf.input_c,\n                                           cnf.expanded_c,\n                                           kernel_size=1,\n                                           norm_layer=norm_layer,\n                                           activation_layer=activation_layer))\n\n        # depthwise\n        layers.append(ConvBNActivation(cnf.expanded_c,\n                                       cnf.expanded_c,\n                                       kernel_size=cnf.kernel,\n                                       stride=cnf.stride,\n                                       groups=cnf.expanded_c,\n                                       norm_layer=norm_layer,\n                                       activation_layer=activation_layer))\n\n        if cnf.use_se:\n            layers.append(SqueezeExcitation(cnf.expanded_c))\n\n        # project\n        layers.append(ConvBNActivation(cnf.expanded_c,\n                                       cnf.out_c,\n                                       kernel_size=1,\n                                       norm_layer=norm_layer,\n                                       activation_layer=nn.Identity))\n\n        self.block = nn.Sequential(*layers)\n        self.out_channels = cnf.out_c\n        self.is_strided = cnf.stride > 1\n\n    def forward(self, x: Tensor) -> Tensor:\n        result = self.block(x)\n        if self.use_res_connect:\n            result += x\n\n        return result\n\n\nclass MobileNetV3(nn.Module):\n    def __init__(self,\n                 inverted_residual_setting: List[InvertedResidualConfig],\n                 last_channel: int,\n                 num_classes: int = 1000,\n                 block: Optional[Callable[..., nn.Module]] = None,\n                 norm_layer: Optional[Callable[..., nn.Module]] = None):\n        super(MobileNetV3, self).__init__()\n\n        if not inverted_residual_setting:\n            raise ValueError(\"The inverted_residual_setting should not be empty.\")\n        elif not (isinstance(inverted_residual_setting, List) and\n                  all([isinstance(s, InvertedResidualConfig) for s in inverted_residual_setting])):\n            raise TypeError(\"The inverted_residual_setting should be List[InvertedResidualConfig]\")\n\n        if block is None:\n            block = InvertedResidual\n\n        if norm_layer is None:\n            norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.01)\n\n        layers: List[nn.Module] = []\n\n        # building first layer\n        firstconv_output_c = inverted_residual_setting[0].input_c\n        layers.append(ConvBNActivation(3,\n                                       firstconv_output_c,\n                                       kernel_size=3,\n                                       stride=2,\n                                       norm_layer=norm_layer,\n                                       activation_layer=nn.Hardswish))\n        # building inverted residual blocks\n        for cnf in inverted_residual_setting:\n            layers.append(block(cnf, norm_layer))\n\n        # building last several layers\n        lastconv_input_c = inverted_residual_setting[-1].out_c\n        lastconv_output_c = 6 * lastconv_input_c\n        layers.append(ConvBNActivation(lastconv_input_c,\n                                       lastconv_output_c,\n                                       kernel_size=1,\n                                       norm_layer=norm_layer,\n                                       activation_layer=nn.Hardswish))\n        self.features = nn.Sequential(*layers)\n        self.avgpool = nn.AdaptiveAvgPool2d(1)\n        self.classifier = nn.Sequential(nn.Linear(lastconv_output_c, last_channel),\n                                        nn.Hardswish(inplace=True),\n                                        nn.Dropout(p=0.2, inplace=True),\n                                        nn.Linear(last_channel, num_classes))\n\n        # initial weights\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode=\"fan_out\")\n                if m.bias is not None:\n                    nn.init.zeros_(m.bias)\n            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):\n                nn.init.ones_(m.weight)\n                nn.init.zeros_(m.bias)\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                nn.init.zeros_(m.bias)\n\n    def _forward_impl(self, x: Tensor) -> Tensor:\n        x = self.features(x)\n        x = self.avgpool(x)\n        x = torch.flatten(x, 1)\n        x = self.classifier(x)\n\n        return x\n\n    def forward(self, x: Tensor) -> Tensor:\n        return self._forward_impl(x)\n\n\ndef mobilenet_v3_large(num_classes: int = 1000,\n                       reduced_tail: bool = False) -> MobileNetV3:\n    \"\"\"\n    Constructs a large MobileNetV3 architecture from\n    \"Searching for MobileNetV3\" <https://arxiv.org/abs/1905.02244>.\n\n    weights_link:\n    https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth\n\n    Args:\n        num_classes (int): number of classes\n        reduced_tail (bool): If True, reduces the channel counts of all feature layers\n            between C4 and C5 by 2. It is used to reduce the channel redundancy in the\n            backbone for Detection and Segmentation.\n    \"\"\"\n    width_multi = 1.0\n    bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi)\n    adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi)\n\n    reduce_divider = 2 if reduced_tail else 1\n\n    inverted_residual_setting = [\n        # input_c, kernel, expanded_c, out_c, use_se, activation, stride\n        bneck_conf(16, 3, 16, 16, False, \"RE\", 1),\n        bneck_conf(16, 3, 64, 24, False, \"RE\", 2),  # C1\n        bneck_conf(24, 3, 72, 24, False, \"RE\", 1),\n        bneck_conf(24, 5, 72, 40, True, \"RE\", 2),  # C2\n        bneck_conf(40, 5, 120, 40, True, \"RE\", 1),\n        bneck_conf(40, 5, 120, 40, True, \"RE\", 1),\n        bneck_conf(40, 3, 240, 80, False, \"HS\", 2),  # C3\n        bneck_conf(80, 3, 200, 80, False, \"HS\", 1),\n        bneck_conf(80, 3, 184, 80, False, \"HS\", 1),\n        bneck_conf(80, 3, 184, 80, False, \"HS\", 1),\n        bneck_conf(80, 3, 480, 112, True, \"HS\", 1),\n        bneck_conf(112, 3, 672, 112, True, \"HS\", 1),\n        bneck_conf(112, 5, 672, 160 // reduce_divider, True, \"HS\", 2),  # C4\n        bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, \"HS\", 1),\n        bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, \"HS\", 1),\n    ]\n    last_channel = adjust_channels(1280 // reduce_divider)  # C5\n\n    return MobileNetV3(inverted_residual_setting=inverted_residual_setting,\n                       last_channel=last_channel,\n                       num_classes=num_classes)\n\n\ndef mobilenet_v3_small(num_classes: int = 1000,\n                       reduced_tail: bool = False) -> MobileNetV3:\n    \"\"\"\n    Constructs a large MobileNetV3 architecture from\n    \"Searching for MobileNetV3\" <https://arxiv.org/abs/1905.02244>.\n\n    weights_link:\n    https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth\n\n    Args:\n        num_classes (int): number of classes\n        reduced_tail (bool): If True, reduces the channel counts of all feature layers\n            between C4 and C5 by 2. It is used to reduce the channel redundancy in the\n            backbone for Detection and Segmentation.\n    \"\"\"\n    width_multi = 1.0\n    bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi)\n    adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi)\n\n    reduce_divider = 2 if reduced_tail else 1\n\n    inverted_residual_setting = [\n        # input_c, kernel, expanded_c, out_c, use_se, activation, stride\n        bneck_conf(16, 3, 16, 16, True, \"RE\", 2),  # C1\n        bneck_conf(16, 3, 72, 24, False, \"RE\", 2),  # C2\n        bneck_conf(24, 3, 88, 24, False, \"RE\", 1),\n        bneck_conf(24, 5, 96, 40, True, \"HS\", 2),  # C3\n        bneck_conf(40, 5, 240, 40, True, \"HS\", 1),\n        bneck_conf(40, 5, 240, 40, True, \"HS\", 1),\n        bneck_conf(40, 5, 120, 48, True, \"HS\", 1),\n        bneck_conf(48, 5, 144, 48, True, \"HS\", 1),\n        bneck_conf(48, 5, 288, 96 // reduce_divider, True, \"HS\", 2),  # C4\n        bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, \"HS\", 1),\n        bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, \"HS\", 1)\n    ]\n    last_channel = adjust_channels(1024 // reduce_divider)  # C5\n\n    return MobileNetV3(inverted_residual_setting=inverted_residual_setting,\n                       last_channel=last_channel,\n                       num_classes=num_classes)\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/quantization_int8.py",
    "content": "from addict import Dict\nfrom compression.engines.ie_engine import IEEngine\nfrom compression.graph import load_model, save_model\nfrom compression.graph.model_utils import compress_model_weights\nfrom compression.pipeline.initializer import create_pipeline\nfrom utils import MyDataLoader, Accuracy, read_split_data\n\n\ndef main():\n    data_path = \"/data/flower_photos\"\n    ir_model_xml = \"ir_output/resnet34.xml\"\n    ir_model_bin = \"ir_output/resnet34.bin\"\n    save_dir = \"quant_ir_output\"\n    model_name = \"quantized_resnet34\"\n    img_w = 224\n    img_h = 224\n\n    model_config = Dict({\n        'model_name': 'resnet34',\n        'model': ir_model_xml,\n        'weights': ir_model_bin\n    })\n    engine_config = Dict({\n        'device': 'CPU',\n        'stat_requests_number': 2,\n        'eval_requests_number': 2\n    })\n    dataset_config = {\n        'data_source': data_path\n    }\n    algorithms = [\n        {\n            'name': 'DefaultQuantization',\n            'params': {\n                'target_device': 'CPU',\n                'preset': 'performance',\n                'stat_subset_size': 300\n            }\n        }\n    ]\n\n    # Steps 1-7: Model optimization\n    # Step 1: Load the model.\n    model = load_model(model_config)\n\n    # Step 2: Initialize the data loader.\n    _, _, val_images_path, val_images_label = read_split_data(data_path, val_rate=0.2)\n    data_loader = MyDataLoader(dataset_config, val_images_path, val_images_label, img_w, img_h)\n\n    # Step 3 (Optional. Required for AccuracyAwareQuantization): Initialize the metric.\n    metric = Accuracy(top_k=1)\n\n    # Step 4: Initialize the engine for metric calculation and statistics collection.\n    engine = IEEngine(engine_config, data_loader, metric)\n\n    # Step 5: Create a pipeline of compression algorithms.\n    pipeline = create_pipeline(algorithms, engine)\n\n    # Step 6: Execute the pipeline.\n    compressed_model = pipeline.run(model)\n\n    # Step 7 (Optional): Compress model weights quantized precision\n    #                    in order to reduce the size of final .bin file.\n    compress_model_weights(compressed_model)\n\n    # Step 8: Save the compressed model to the desired path.\n    compressed_model_paths = save_model(model=compressed_model,\n                                        save_path=save_dir,\n                                        model_name=model_name)\n\n    # Step 9: Compare accuracy of the original and quantized models.\n    metric_results = pipeline.evaluate(model)\n    if metric_results:\n        for name, value in metric_results.items():\n            print(f\"Accuracy of the original model: {name}: {value}\")\n\n    metric_results = pipeline.evaluate(compressed_model)\n    if metric_results:\n        for name, value in metric_results.items():\n            print(f\"Accuracy of the optimized model: {name}: {value}\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/requirements.txt",
    "content": "torch==1.11.0\ntorchvision==0.12.0\nonnx==1.13.0\nonnxruntime==1.8.0\nprotobuf==3.19.5\nopenvino-dev==2022.1.0\nmatplotlib"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_openvino/convert_resnet34/utils.py",
    "content": "import os\nimport json\nimport random\n\nfrom PIL import Image\nimport numpy as np\nfrom compression.api import DataLoader, Metric\nfrom torchvision.transforms import transforms\n\n\ndef read_split_data(root: str, val_rate: float = 0.2):\n    random.seed(0)  # 保证随机结果可复现\n    assert os.path.exists(root), \"dataset root: {} does not exist.\".format(root)\n\n    # 遍历文件夹，一个文件夹对应一个类别\n    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]\n    # 排序，保证顺序一致\n    flower_class.sort()\n    # 生成类别名称以及对应的数字索引\n    class_indices = dict((k, v) for v, k in enumerate(flower_class))\n    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    train_images_path = []  # 存储训练集的所有图片路径\n    train_images_label = []  # 存储训练集图片对应索引信息\n    val_images_path = []  # 存储验证集的所有图片路径\n    val_images_label = []  # 存储验证集图片对应索引信息\n    every_class_num = []  # 存储每个类别的样本总数\n    supported = [\".jpg\", \".JPG\", \".png\", \".PNG\"]  # 支持的文件后缀类型\n    # 遍历每个文件夹下的文件\n    for cla in flower_class:\n        cla_path = os.path.join(root, cla)\n        # 遍历获取supported支持的所有文件路径\n        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)\n                  if os.path.splitext(i)[-1] in supported]\n        # 获取该类别对应的索引\n        image_class = class_indices[cla]\n        # 记录该类别的样本数量\n        every_class_num.append(len(images))\n        # 按比例随机采样验证样本\n        val_path = random.sample(images, k=int(len(images) * val_rate))\n\n        for img_path in images:\n            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集\n                val_images_path.append(img_path)\n                val_images_label.append(image_class)\n            else:  # 否则存入训练集\n                train_images_path.append(img_path)\n                train_images_label.append(image_class)\n\n    print(\"{} images were found in the dataset.\".format(sum(every_class_num)))\n    print(\"{} images for training.\".format(len(train_images_path)))\n    print(\"{} images for validation.\".format(len(val_images_path)))\n\n    return train_images_path, train_images_label, val_images_path, val_images_label\n\n\n# Custom implementation of classification accuracy metric.\nclass Accuracy(Metric):\n    # Required methods\n    def __init__(self, top_k=1):\n        super().__init__()\n        self._top_k = top_k\n        self._name = 'accuracy@top{}'.format(self._top_k)\n        self._matches = []\n\n    @property\n    def value(self):\n        \"\"\" Returns accuracy metric value for the last model output. \"\"\"\n        return {self._name: self._matches[-1]}\n\n    @property\n    def avg_value(self):\n        \"\"\" Returns accuracy metric value for all model outputs. \"\"\"\n        return {self._name: np.ravel(self._matches).mean()}\n\n    def update(self, output, target):\n        \"\"\" Updates prediction matches.\n        :param output: model output\n        :param target: annotations\n        \"\"\"\n        if len(output) > 1:\n            raise Exception('The accuracy metric cannot be calculated '\n                            'for a model with multiple outputs')\n        if isinstance(target, dict):\n            target = list(target.values())\n        predictions = np.argsort(output[0], axis=1)[:, -self._top_k:]\n        match = [float(t in predictions[i]) for i, t in enumerate(target)]\n\n        self._matches.append(match)\n\n    def reset(self):\n        \"\"\" Resets collected matches \"\"\"\n        self._matches = []\n\n    def get_attributes(self):\n        \"\"\"\n        Returns a dictionary of metric attributes {metric_name: {attribute_name: value}}.\n        Required attributes: 'direction': 'higher-better' or 'higher-worse'\n                             'type': metric type\n        \"\"\"\n        return {self._name: {'direction': 'higher-better',\n                             'type': 'accuracy'}}\n\n\nclass MyDataLoader(DataLoader):\n    def __init__(self, cfg, images_path: list, images_label: list, img_w: int = 224, img_h: int = 224):\n        super().__init__(cfg)\n        self.images_path = images_path\n        self.images_label = images_label\n        self.image_w = img_w\n        self.image_h = img_h\n        self.transforms = transforms.Compose([\n            transforms.Resize(min(img_h, img_w)),\n            transforms.CenterCrop((img_h, img_w))\n        ])\n\n    def __len__(self):\n        return len(self.images_label)\n\n    def __getitem__(self, index):\n        \"\"\"\n        Return one sample of index, label and picture.\n        :param index: index of the taken sample.\n        \"\"\"\n        if index >= len(self):\n            raise IndexError\n\n        img = Image.open(self.images_path[index])\n        img = self.transforms(img)\n\n        # Convert the resized images to network input shape\n        # [h, w, c] -> [c, h, w] -> [1, c, h, w]\n        img = np.expand_dims(np.transpose(np.array(img), (2, 0, 1)), 0)\n\n        return (index, self.images_label[index]), img\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/README.md",
    "content": "OpenVINO量化YOLOv5\n\n1. 按照`requirements.txt`配置环境\n2. 将YOLOv5转为ONNX\nYOLOv5官方有提供导出ONNX以及OpenVINO的方法，但我这里仅导出成ONNX，这里以YOLOv5s为例\n```\npython export.py --weights yolov5s.pt --include onnx\n```\n\n3. ONNX转换为IR\n使用OpenVINO的`mo`工具将ONNX转为OpenVINO的IR格式\n```\nmo  --input_model yolov5s.onnx \\\n    --input_shape \"[1,3,640,640]\" \\\n    --scale 255 \\\n    --data_type FP32 \\\n    --output_dir ir_output\n```\n\n4. 量化模型\n使用`quantization_int8.py`进行模型的量化，量化过程中需要使用到COCO2017数据集，需要将`data_path`指向coco2017目录\n```\n├── coco2017: 数据集根目录\n     ├── train2017: 所有训练图像文件夹(118287张)\n     ├── val2017: 所有验证图像文件夹(5000张)\n     └── annotations: 对应标注文件夹\n              ├── instances_train2017.json: 对应目标检测、分割任务的训练集标注文件\n              ├── instances_val2017.json: 对应目标检测、分割任务的验证集标注文件\n              ├── captions_train2017.json: 对应图像描述的训练集标注文件\n              ├── captions_val2017.json: 对应图像描述的验证集标注文件\n              ├── person_keypoints_train2017.json: 对应人体关键点检测的训练集标注文件\n              └── person_keypoints_val2017.json: 对应人体关键点检测的验证集标注文件夹\n```\n\n5. benchmark\n直接利用`benchmark_app`工具测试量化前后的`Throughput`，这里以`CPU: Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz`设备为例\n```\nbenchmark_app -m ir_output/yolov5s.xml -d CPU -api sync\n```\noutput：\n```\nLatency:\n    Median:     59.56 ms\n    AVG:        63.30 ms\n    MIN:        57.88 ms\n    MAX:        99.89 ms\nThroughput: 16.79 FPS\n```\n\n```\nbenchmark_app -m quant_ir_output/quantized_yolov5s.xml -d CPU -api sync\n```\noutput:\n```\nLatency:\n    Median:     42.97 ms\n    AVG:        46.56 ms\n    MIN:        41.18 ms\n    MAX:        95.75 ms\nThroughput: 23.27 FPS\n```"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/compare_fps.py",
    "content": "import time\nimport numpy as np\nimport torch\nimport onnxruntime\nimport matplotlib.pyplot as plt\nfrom openvino.runtime import Core\n\n\ndef normalize(image: np.ndarray) -> np.ndarray:\n    \"\"\"\n    Normalize the image to the given mean and standard deviation\n    \"\"\"\n    image = image.astype(np.float32)\n    image /= 255.0\n    return image\n\n\ndef onnx_inference(onnx_path: str, image: np.ndarray, num_images: int = 20):\n    # load onnx model\n    ort_session = onnxruntime.InferenceSession(onnx_path)\n\n    # compute onnx Runtime output prediction\n    ort_inputs = {ort_session.get_inputs()[0].name: image}\n\n    start = time.perf_counter()\n    for _ in range(num_images):\n        ort_session.run(None, ort_inputs)\n    end = time.perf_counter()\n    time_onnx = end - start\n    print(\n        f\"ONNX model in Inference Engine/CPU: {time_onnx / num_images:.3f} \"\n        f\"seconds per image, FPS: {num_images / time_onnx:.2f}\"\n    )\n\n    return num_images / time_onnx\n\n\ndef ir_inference(ir_path: str, image: np.ndarray, num_images: int = 20):\n    # Load the network in Inference Engine\n    ie = Core()\n    model_ir = ie.read_model(model=ir_path)\n    compiled_model_ir = ie.compile_model(model=model_ir, device_name=\"CPU\")\n\n    # Get input and output layers\n    input_layer_ir = next(iter(compiled_model_ir.inputs))\n    output_layer_ir = next(iter(compiled_model_ir.outputs))\n\n    start = time.perf_counter()\n    request_ir = compiled_model_ir.create_infer_request()\n    for _ in range(num_images):\n        request_ir.infer(inputs={input_layer_ir.any_name: image})\n    end = time.perf_counter()\n    time_ir = end - start\n    print(\n        f\"IR model in Inference Engine/CPU: {time_ir / num_images:.3f} \"\n        f\"seconds per image, FPS: {num_images / time_ir:.2f}\"\n    )\n\n    return num_images / time_ir\n\n\ndef pytorch_inference(image: np.ndarray, num_images: int = 20):\n    image = torch.as_tensor(image, dtype=torch.float32)\n\n    model = torch.hub.load('ultralytics/yolov5', 'yolov5s')\n    model.eval()\n\n    with torch.no_grad():\n        start = time.perf_counter()\n        for _ in range(num_images):\n            model(image)\n        end = time.perf_counter()\n        time_torch = end - start\n\n    print(\n        f\"PyTorch model on CPU: {time_torch / num_images:.3f} seconds per image, \"\n        f\"FPS: {num_images / time_torch:.2f}\"\n    )\n\n    return num_images / time_torch\n\n\ndef plot_fps(v: dict):\n    x = list(v.keys())\n    y = list(v.values())\n\n    plt.bar(range(len(x)), y, align='center')\n    plt.xticks(range(len(x)), x)\n    for i, v in enumerate(y):\n        plt.text(x=i, y=v+0.5, s=f\"{v:.2f}\", ha='center')\n    plt.xlabel('model format')\n    plt.ylabel('fps')\n    plt.title('FPS comparison')\n    plt.show()\n    plt.savefig('fps_vs.jpg')\n\n\ndef main():\n    image_h = 640\n    image_w = 640\n    onnx_path = \"yolov5s.onnx\"\n    ir_path = \"ir_output/yolov5s.xml\"\n\n    image = np.random.randn(image_h, image_w, 3)\n    normalized_image = normalize(image)\n\n    # Convert the resized images to network input shape\n    # [h, w, c] -> [c, h, w] -> [1, c, h, w]\n    input_image = np.expand_dims(np.transpose(image, (2, 0, 1)), 0)\n    normalized_input_image = np.expand_dims(np.transpose(normalized_image, (2, 0, 1)), 0)\n\n    onnx_fps = onnx_inference(onnx_path, normalized_input_image, num_images=100)\n    ir_fps = ir_inference(ir_path, input_image, num_images=100)\n    pytorch_fps = pytorch_inference(normalized_input_image, num_images=100)\n    plot_fps({\"pytorch\": round(pytorch_fps, 2),\n              \"onnx\": round(onnx_fps, 2),\n              \"ir\": round(ir_fps, 2)})\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/compare_onnx_and_ir.py",
    "content": "import numpy as np\nimport onnxruntime\nfrom openvino.runtime import Core\n\n\ndef normalize(image: np.ndarray) -> np.ndarray:\n    \"\"\"\n    Normalize the image to the given mean and standard deviation\n    \"\"\"\n    image = image.astype(np.float32)\n    image /= 255.0\n    return image\n\n\ndef onnx_inference(onnx_path: str, image: np.ndarray):\n    # load onnx model\n    ort_session = onnxruntime.InferenceSession(onnx_path)\n\n    # compute onnx Runtime output prediction\n    ort_inputs = {ort_session.get_inputs()[0].name: image}\n    res_onnx = ort_session.run(None, ort_inputs)[0]\n    return res_onnx\n\n\ndef ir_inference(ir_path: str, image: np.ndarray):\n    # Load the network in Inference Engine\n    ie = Core()\n    model_ir = ie.read_model(model=ir_path)\n    compiled_model_ir = ie.compile_model(model=model_ir, device_name=\"CPU\")\n\n    # Get input and output layers\n    input_layer_ir = next(iter(compiled_model_ir.inputs))\n    output_layer_ir = next(iter(compiled_model_ir.outputs))\n\n    # Run inference on the input image\n    res_ir = compiled_model_ir([image])[output_layer_ir]\n    return res_ir\n\n\ndef main():\n    image_h = 640\n    image_w = 640\n    onnx_path = \"yolov5s.onnx\"\n    ir_path = \"ir_output/yolov5s.xml\"\n\n    image = np.random.randn(image_h, image_w, 3)\n    normalized_image = normalize(image)\n\n    # Convert the resized images to network input shape\n    # [h, w, c] -> [c, h, w] -> [1, c, h, w]\n    input_image = np.expand_dims(np.transpose(image, (2, 0, 1)), 0)\n    normalized_input_image = np.expand_dims(np.transpose(normalized_image, (2, 0, 1)), 0)\n\n    onnx_res = onnx_inference(onnx_path, normalized_input_image)\n    ir_res = ir_inference(ir_path, input_image)\n    np.testing.assert_allclose(onnx_res, ir_res, rtol=1e-03, atol=1e-05)\n    print(\"Exported model has been tested with OpenvinoRuntime, and the result looks good!\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/draw_box_utils.py",
    "content": "from PIL.Image import Image, fromarray\nimport PIL.ImageDraw as ImageDraw\nimport PIL.ImageFont as ImageFont\nfrom PIL import ImageColor\nimport numpy as np\n\nSTANDARD_COLORS = [\n    'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',\n    'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',\n    'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',\n    'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',\n    'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',\n    'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',\n    'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',\n    'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',\n    'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',\n    'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',\n    'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',\n    'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',\n    'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',\n    'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',\n    'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',\n    'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',\n    'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',\n    'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',\n    'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',\n    'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',\n    'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',\n    'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',\n    'WhiteSmoke', 'Yellow', 'YellowGreen'\n]\n\n\ndef draw_text(draw,\n              box: list,\n              cls: int,\n              score: float,\n              category_index: dict,\n              color: str,\n              font: str = 'arial.ttf',\n              font_size: int = 24):\n    \"\"\"\n    将目标边界框和类别信息绘制到图片上\n    \"\"\"\n    try:\n        font = ImageFont.truetype(font, font_size)\n    except IOError:\n        font = ImageFont.load_default()\n\n    left, top, right, bottom = box\n    # If the total height of the display strings added to the top of the bounding\n    # box exceeds the top of the image, stack the strings below the bounding box\n    # instead of above.\n    display_str = f\"{category_index[str(cls)]}: {int(100 * score)}%\"\n    display_str_heights = [font.getsize(ds)[1] for ds in display_str]\n    # Each display_str has a top and bottom margin of 0.05x.\n    display_str_height = (1 + 2 * 0.05) * max(display_str_heights)\n\n    if top > display_str_height:\n        text_top = top - display_str_height\n        text_bottom = top\n    else:\n        text_top = bottom\n        text_bottom = bottom + display_str_height\n\n    for ds in display_str:\n        text_width, text_height = font.getsize(ds)\n        margin = np.ceil(0.05 * text_width)\n        draw.rectangle([(left, text_top),\n                        (left + text_width + 2 * margin, text_bottom)], fill=color)\n        draw.text((left + margin, text_top),\n                  ds,\n                  fill='black',\n                  font=font)\n        left += text_width\n\n\ndef draw_masks(image, masks, colors, thresh: float = 0.7, alpha: float = 0.5):\n    np_image = np.array(image)\n    masks = np.where(masks > thresh, True, False)\n\n    # colors = np.array(colors)\n    img_to_draw = np.copy(np_image)\n    # TODO: There might be a way to vectorize this\n    for mask, color in zip(masks, colors):\n        img_to_draw[mask] = color\n\n    out = np_image * (1 - alpha) + img_to_draw * alpha\n    return fromarray(out.astype(np.uint8))\n\n\ndef draw_objs(image: Image,\n              boxes: np.ndarray = None,\n              classes: np.ndarray = None,\n              scores: np.ndarray = None,\n              masks: np.ndarray = None,\n              category_index: dict = None,\n              box_thresh: float = 0.1,\n              mask_thresh: float = 0.5,\n              line_thickness: int = 8,\n              font: str = 'arial.ttf',\n              font_size: int = 24,\n              draw_boxes_on_image: bool = True,\n              draw_masks_on_image: bool = False):\n    \"\"\"\n    将目标边界框信息，类别信息，mask信息绘制在图片上\n    Args:\n        image: 需要绘制的图片\n        boxes: 目标边界框信息\n        classes: 目标类别信息\n        scores: 目标概率信息\n        masks: 目标mask信息\n        category_index: 类别与名称字典\n        box_thresh: 过滤的概率阈值\n        mask_thresh:\n        line_thickness: 边界框宽度\n        font: 字体类型\n        font_size: 字体大小\n        draw_boxes_on_image:\n        draw_masks_on_image:\n\n    Returns:\n\n    \"\"\"\n\n    # 过滤掉低概率的目标\n    idxs = np.greater(scores, box_thresh)\n    boxes = boxes[idxs]\n    classes = classes[idxs]\n    scores = scores[idxs]\n    if masks is not None:\n        masks = masks[idxs]\n    if len(boxes) == 0:\n        return image\n\n    colors = [ImageColor.getrgb(STANDARD_COLORS[cls % len(STANDARD_COLORS)]) for cls in classes]\n\n    if draw_boxes_on_image:\n        # Draw all boxes onto image.\n        draw = ImageDraw.Draw(image)\n        for box, cls, score, color in zip(boxes, classes, scores, colors):\n            left, top, right, bottom = box\n            # 绘制目标边界框\n            draw.line([(left, top), (left, bottom), (right, bottom),\n                       (right, top), (left, top)], width=line_thickness, fill=color)\n            # 绘制类别和概率信息\n            draw_text(draw, box.tolist(), int(cls), float(score), category_index, color, font, font_size)\n\n    if draw_masks_on_image and (masks is not None):\n        # Draw all mask onto image.\n        image = draw_masks(image, masks, colors, mask_thresh)\n\n    return image\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/evaluation.py",
    "content": "from tqdm import tqdm\nimport torch\nfrom openvino.runtime import Core\nfrom utils import MyDataLoader, EvalCOCOMetric, non_max_suppression\n\n\ndef main():\n    data_path = \"/data/coco2017\"\n    ir_model_xml = \"quant_ir_output/quantized_yolov5s.xml\"\n    img_size = (640, 640)  # h, w\n\n    data_loader = MyDataLoader(data_path, \"val\", size=img_size)\n    coco80_to_91 = data_loader.coco_id80_to_id91\n    metrics = EvalCOCOMetric(coco=data_loader.coco, classes_mapping=coco80_to_91)\n\n    # Load the network in Inference Engine\n    ie = Core()\n    model_ir = ie.read_model(model=ir_model_xml)\n    compiled_model = ie.compile_model(model=model_ir, device_name=\"CPU\")\n    inputs_names = compiled_model.inputs\n    outputs_names = compiled_model.outputs\n\n    # inference\n    request = compiled_model.create_infer_request()\n    for i in tqdm(range(len(data_loader))):\n        data = data_loader[i]\n        ann, img, info = data\n        ann = ann + (info,)\n\n        request.infer(inputs={inputs_names[0]: img})\n        result = request.get_output_tensor(outputs_names[0].index).data\n\n        # post-process\n        result = non_max_suppression(torch.Tensor(result), conf_thres=0.001, iou_thres=0.6, multi_label=True)[0]\n        boxes = result[:, :4].numpy()\n        scores = result[:, 4].numpy()\n        cls = result[:, 5].numpy().astype(int)\n        metrics.update(ann, [boxes, cls, scores])\n\n    metrics.evaluate()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/predict.py",
    "content": "import cv2\nimport numpy as np\nimport torch\nfrom PIL import Image\nimport matplotlib.pyplot as plt\nfrom openvino.runtime import Core\nfrom utils import letterbox, scale_coords, non_max_suppression, coco80_names\nfrom draw_box_utils import draw_objs\n\n\ndef main():\n    img_path = \"test.jpg\"\n    ir_model_xml = \"ir_output/yolov5s.xml\"\n    img_size = (640, 640)  # h, w\n\n    origin_img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)\n    reshape_img, ratio, pad = letterbox(origin_img, img_size, auto=False)\n    input_img = np.expand_dims(np.transpose(reshape_img, [2, 0, 1]), 0).astype(np.float32)\n\n    # Load the network in Inference Engine\n    ie = Core()\n    model_ir = ie.read_model(model=ir_model_xml)\n    compiled_model = ie.compile_model(model=model_ir, device_name=\"CPU\")\n    inputs_names = compiled_model.inputs\n    outputs_names = compiled_model.outputs\n\n    # inference\n    request = compiled_model.create_infer_request()\n    request.infer(inputs={inputs_names[0]: input_img})\n    result = request.get_output_tensor(outputs_names[0].index).data\n\n    # post-process\n    result = non_max_suppression(torch.Tensor(result))[0]\n    boxes = result[:, :4].numpy()\n    scores = result[:, 4].numpy()\n    cls = result[:, 5].numpy().astype(int)\n    boxes = scale_coords(reshape_img.shape, boxes, origin_img.shape, (ratio, pad))\n\n    draw_img = draw_objs(Image.fromarray(origin_img),\n                         boxes,\n                         cls,\n                         scores,\n                         category_index=dict([(str(i), v) for i, v in enumerate(coco80_names)]))\n    plt.imshow(draw_img)\n    plt.show()\n    draw_img.save(\"predict.jpg\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/quantization_int8.py",
    "content": "import time\nfrom addict import Dict\nfrom compression.engines.ie_engine import IEEngine\nfrom compression.graph import load_model, save_model\nfrom compression.graph.model_utils import compress_model_weights\nfrom compression.pipeline.initializer import create_pipeline\nfrom yaspin import yaspin\nfrom utils import MyDataLoader, MAPMetric\n\n\ndef main():\n    data_path = \"/data/coco2017\"\n    ir_model_xml = \"ir_output/yolov5s.xml\"\n    ir_model_bin = \"ir_output/yolov5s.bin\"\n    save_dir = \"quant_ir_output\"\n    model_name = \"quantized_yolov5s\"\n    img_w = 640\n    img_h = 640\n\n    model_config = Dict({\n        'model_name': 'yolov5s',\n        'model': ir_model_xml,\n        'weights': ir_model_bin,\n        'inputs': 'images',\n        'outputs': 'output'\n    })\n    engine_config = Dict({'device': 'CPU'})\n\n    algorithms = [\n        {\n            'name': 'DefaultQuantization',\n            'params': {\n                'target_device': 'CPU',\n                'preset': 'performance',\n                'stat_subset_size': 300\n            }\n        }\n    ]\n\n    # Step 1: Load the model.\n    model = load_model(model_config)\n\n    # Step 2: Initialize the data loader.\n    data_loader = MyDataLoader(data_path, \"val\", (img_h, img_w))\n\n    # Step 3: initialize the metric\n    # For DefaultQuantization, specifying a metric is optional: metric can be set to None\n    metric = MAPMetric(map_value=\"map\")\n\n    # Step 4: Initialize the engine for metric calculation and statistics collection.\n    engine = IEEngine(config=engine_config, data_loader=data_loader, metric=metric)\n\n    # Step 5: Create a pipeline of compression algorithms.\n    pipeline = create_pipeline(algorithms, engine)\n\n    # Step 6: Execute the pipeline to quantize the model\n    algorithm_name = pipeline.algo_seq[0].name\n    with yaspin(\n            text=f\"Executing POT pipeline on {model_config['model']} with {algorithm_name}\"\n    ) as sp:\n        start_time = time.perf_counter()\n        compressed_model = pipeline.run(model)\n        end_time = time.perf_counter()\n        sp.ok(\"✔\")\n    print(f\"Quantization finished in {end_time - start_time:.2f} seconds\")\n\n    # Step 7 (Optional): Compress model weights to quantized precision\n    #                    in order to reduce the size of the final .bin file\n    compress_model_weights(compressed_model)\n\n    # Step 8: Save the compressed model to the desired path.\n    # Set save_path to the directory where the compressed model should be stored\n    compressed_model_paths = save_model(\n        model=compressed_model,\n        save_path=save_dir,\n        model_name=model_name,\n    )\n\n    compressed_model_path = compressed_model_paths[0][\"model\"]\n    print(\"The quantized model is stored at\", compressed_model_path)\n\n    # Compute the mAP on the quantized model and compare with the mAP on the FP16 IR model.\n    ir_model = load_model(model_config=model_config)\n    evaluation_pipeline = create_pipeline(algo_config=dict(), engine=engine)\n\n    with yaspin(text=\"Evaluating original IR model\") as sp:\n        original_metric = evaluation_pipeline.evaluate(ir_model)\n\n    if original_metric:\n        for key, value in original_metric.items():\n            print(f\"The {key} score of the original model is {value:.5f}\")\n\n    with yaspin(text=\"Evaluating quantized IR model\") as sp:\n        quantized_metric = pipeline.evaluate(compressed_model)\n\n    if quantized_metric:\n        for key, value in quantized_metric.items():\n            print(f\"The {key} score of the quantized INT8 model is {value:.5f}\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/requirements.txt",
    "content": "torch==1.13.1\ntorchvision==0.12.0\nonnx==1.13.0\nonnxruntime==1.8.0\nprotobuf==3.19.5\nopenvino-dev==2022.1.0\nmatplotlib\ntorchmetrics==0.9.1"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_openvino/convert_yolov5/utils.py",
    "content": "import os\nimport time\nimport json\nimport copy\n\nimport cv2\nimport numpy as np\nimport torch\nfrom torchmetrics.detection.mean_ap import MeanAveragePrecision\nimport torchvision\nfrom pycocotools.coco import COCO\nfrom pycocotools.cocoeval import COCOeval\nfrom compression.api import DataLoader, Metric\n\n\ncoco80_names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',\n                'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',\n                'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',\n                'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',\n                'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',\n                'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',\n                'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',\n                'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',\n                'scissors', 'teddy bear', 'hair drier', 'toothbrush']\n\n\ndef box_iou(box1, box2):\n    # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py\n    \"\"\"\n    Return intersection-over-union (Jaccard index) of boxes.\n    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.\n    Arguments:\n        box1 (Tensor[N, 4])\n        box2 (Tensor[M, 4])\n    Returns:\n        iou (Tensor[N, M]): the NxM matrix containing the pairwise\n            IoU values for every element in boxes1 and boxes2\n    \"\"\"\n\n    def box_area(box):\n        # box = 4xn\n        return (box[2] - box[0]) * (box[3] - box[1])\n\n    area1 = box_area(box1.T)\n    area2 = box_area(box2.T)\n\n    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)\n    inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)\n    return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)\n\n\ndef xywh2xyxy(x):\n    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right\n    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)\n    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x\n    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y\n    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x\n    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y\n    return y\n\n\ndef non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,\n                        labels=(), max_det=300):\n    \"\"\"Runs Non-Maximum Suppression (NMS) on inference results\n\n    Returns:\n         list of detections, on (n,6) tensor per image [xyxy, conf, cls]\n    \"\"\"\n\n    nc = prediction.shape[2] - 5  # number of classes\n    xc = prediction[..., 4] > conf_thres  # candidates\n\n    # Checks\n    assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'\n    assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'\n\n    # Settings\n    min_wh, max_wh = 2, 7680  # (pixels) minimum and maximum box width and height\n    max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()\n    time_limit = 10.0  # seconds to quit after\n    redundant = True  # require redundant detections\n    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)\n    merge = False  # use merge-NMS\n\n    t = time.time()\n    output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]\n    for xi, x in enumerate(prediction):  # image index, image inference\n        # Apply constraints\n        x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height\n        x = x[xc[xi]]  # confidence\n\n        # Cat apriori labels if autolabelling\n        if labels and len(labels[xi]):\n            lb = labels[xi]\n            v = torch.zeros((len(lb), nc + 5), device=x.device)\n            v[:, :4] = lb[:, 1:5]  # box\n            v[:, 4] = 1.0  # conf\n            v[range(len(lb)), lb[:, 0].long() + 5] = 1.0  # cls\n            x = torch.cat((x, v), 0)\n\n        # If none remain process next image\n        if not x.shape[0]:\n            continue\n\n        # Compute conf\n        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf\n\n        # Box (center x, center y, width, height) to (x1, y1, x2, y2)\n        box = xywh2xyxy(x[:, :4])\n\n        # Detections matrix nx6 (xyxy, conf, cls)\n        if multi_label:\n            i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T\n            x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)\n        else:  # best class only\n            conf, j = x[:, 5:].max(1, keepdim=True)\n            x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]\n\n        # Filter by class\n        if classes is not None:\n            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]\n\n        # Apply finite constraint\n        # if not torch.isfinite(x).all():\n        #     x = x[torch.isfinite(x).all(1)]\n\n        # Check shape\n        n = x.shape[0]  # number of boxes\n        if not n:  # no boxes\n            continue\n        elif n > max_nms:  # excess boxes\n            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence\n\n        # Batched NMS\n        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes\n        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores\n        i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS\n        if i.shape[0] > max_det:  # limit detections\n            i = i[:max_det]\n        if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)\n            # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)\n            iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix\n            weights = iou * scores[None]  # box weights\n            x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes\n            if redundant:\n                i = i[iou.sum(1) > 1]  # require redundancy\n\n        output[xi] = x[i]\n        if (time.time() - t) > time_limit:\n            print(f'WARNING: NMS time limit {time_limit}s exceeded')\n            break  # time limit exceeded\n\n    return output\n\n\nclass MAPMetric(Metric):\n    def __init__(self, map_value=\"map\", conf_thres=0.001, iou_thres=0.6):\n        \"\"\"\n        Mean Average Precision Metric. Wraps torchmetrics implementation, see\n        https://torchmetrics.readthedocs.io/en/stable/detection/mean_average_precision.html\n\n        :map_value: specific metric to return. Default: \"map\"\n                    Change `to one of the values in the list below to return a different value\n                    ['mar_1', 'mar_10', 'mar_100', 'mar_small', 'mar_medium', 'mar_large',\n                     'map', 'map_50', 'map_75', 'map_small', 'map_medium', 'map_large']\n                    See torchmetrics documentation for more details.\n        \"\"\"\n\n        self._name = map_value\n        self.metric = MeanAveragePrecision(box_format=\"xyxy\")\n        self.conf_thres = conf_thres\n        self.iou_thres = iou_thres\n        super().__init__()\n\n    @property\n    def value(self):\n        \"\"\"\n        Returns metric value for the last model output.\n        Possible format: {metric_name: [metric_values_per_image]}\n        \"\"\"\n        return {self._name: [0]}\n\n    @property\n    def avg_value(self):\n        \"\"\"\n        Returns average metric value for all model outputs.\n        Possible format: {metric_name: metric_value}\n        \"\"\"\n        return {self._name: self.metric.compute()[self._name].item()}\n\n    def update(self, output, target):\n        \"\"\"\n        Convert network output and labels to the format that torchmetrics' MAP\n        implementation expects, and call `metric.update()`.\n\n        :param output: model output\n        :param target: annotations for model output\n        \"\"\"\n        targetboxes = []\n        targetlabels = []\n        predboxes = []\n        predlabels = []\n        scores = []\n\n        for single_target in target[0]:\n            txmin, tymin, txmax, tymax = single_target[\"bbox\"]\n            category = single_target[\"category_id\"]\n\n            targetbox = [round(txmin), round(tymin), round(txmax), round(tymax)]\n            targetboxes.append(targetbox)\n            targetlabels.append(category)\n\n        output = torch.Tensor(output[0]).float()\n        output = non_max_suppression(output, conf_thres=self.conf_thres, iou_thres=self.iou_thres, multi_label=True)\n        for single_output in output:\n            for pred in single_output.numpy():\n                xmin, ymin, xmax, ymax, conf, label = pred\n\n                predbox = [round(xmin), round(ymin), round(xmax), round(ymax)]\n                predboxes.append(predbox)\n                predlabels.append(label)\n                scores.append(conf)\n\n        preds = [\n            dict(\n                boxes=torch.Tensor(predboxes).float(),\n                labels=torch.Tensor(predlabels).short(),\n                scores=torch.Tensor(scores),\n            )\n        ]\n        targets = [\n            dict(\n                boxes=torch.Tensor(targetboxes).float(),\n                labels=torch.Tensor(targetlabels).short(),\n            )\n        ]\n        self.metric.update(preds, targets)\n\n    def reset(self):\n        \"\"\"\n        Resets metric\n        \"\"\"\n        self.metric.reset()\n\n    def get_attributes(self):\n        \"\"\"\n        Returns a dictionary of metric attributes {metric_name: {attribute_name: value}}.\n        Required attributes: 'direction': 'higher-better' or 'higher-worse'\n                             'type': metric type\n        \"\"\"\n        return {self._name: {\"direction\": \"higher-better\", \"type\": \"mAP\"}}\n\n\ndef _coco_remove_images_without_annotations(dataset, ids):\n    \"\"\"\n    删除coco数据集中没有目标，或者目标面积非常小的数据\n    refer to:\n    https://github.com/pytorch/vision/blob/master/references/detection/coco_utils.py\n    :param dataset:\n    :param cat_list:\n    :return:\n    \"\"\"\n    def _has_only_empty_bbox(anno):\n        return all(any(o <= 1 for o in obj[\"bbox\"][2:]) for obj in anno)\n\n    def _has_valid_annotation(anno):\n        # if it's empty, there is no annotation\n        if len(anno) == 0:\n            return False\n        # if all boxes have close to zero area, there is no annotation\n        if _has_only_empty_bbox(anno):\n            return False\n\n        return True\n\n    valid_ids = []\n    for ds_idx, img_id in enumerate(ids):\n        ann_ids = dataset.getAnnIds(imgIds=img_id, iscrowd=None)\n        anno = dataset.loadAnns(ann_ids)\n\n        if _has_valid_annotation(anno):\n            valid_ids.append(img_id)\n\n    return valid_ids\n\n\ndef scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):\n    # Rescale coords (xyxy) from img1_shape to img0_shape\n    if ratio_pad is None:  # calculate from img0_shape\n        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new\n        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding\n    else:\n        assert ratio_pad[0][0] == ratio_pad[0][1]\n        gain = ratio_pad[0][0]\n        pad = ratio_pad[1]\n\n    coords[:, [0, 2]] -= pad[0]  # x padding\n    coords[:, [1, 3]] -= pad[1]  # y padding\n    coords[:, :4] /= gain\n    clip_coords(coords, img0_shape)\n    return coords\n\n\ndef clip_coords(boxes, shape):\n    # Clip bounding xyxy bounding boxes to image shape (height, width)\n    if isinstance(boxes, torch.Tensor):  # faster individually\n        boxes[:, 0].clamp_(0, shape[1])  # x1\n        boxes[:, 1].clamp_(0, shape[0])  # y1\n        boxes[:, 2].clamp_(0, shape[1])  # x2\n        boxes[:, 3].clamp_(0, shape[0])  # y2\n    else:  # np.array (faster grouped)\n        boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1])  # x1, x2\n        boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0])  # y1, y2\n\n\ndef letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):\n    # Resize and pad image while meeting stride-multiple constraints\n    shape = im.shape[:2]  # current shape [height, width]\n    if isinstance(new_shape, int):\n        new_shape = (new_shape, new_shape)\n\n    # Scale ratio (new / old)\n    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])\n    if not scaleup:  # only scale down, do not scale up (for better val mAP)\n        r = min(r, 1.0)\n\n    # Compute padding\n    ratio = r, r  # width, height ratios\n    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))\n    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding\n    if auto:  # minimum rectangle\n        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding\n    elif scaleFill:  # stretch\n        dw, dh = 0.0, 0.0\n        new_unpad = (new_shape[1], new_shape[0])\n        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios\n\n    dw /= 2  # divide padding into 2 sides\n    dh /= 2\n\n    if shape[::-1] != new_unpad:  # resize\n        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)\n    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))\n    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))\n    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border\n    return im, ratio, (left, top)\n\n\nclass MyDataLoader(DataLoader):\n    \"\"\"`MS Coco Detection <https://cocodataset.org/>`_ Dataset.\n\n    Args:\n        root (string): Root directory where images are downloaded to.\n        dataset (string): \"train\" or \"val.\n        size (tuple): (h, w)\n    \"\"\"\n    def __init__(self, root, dataset=\"train\", size=(640, 640)):\n        assert dataset in [\"train\", \"val\"], 'dataset must be in [\"train\", \"val\"]'\n        anno_file = \"instances_{}2017.json\".format(dataset)\n        assert os.path.exists(root), \"file '{}' does not exist.\".format(root)\n        self.img_root = os.path.join(root, \"{}2017\".format(dataset))\n        assert os.path.exists(self.img_root), \"path '{}' does not exist.\".format(self.img_root)\n        self.anno_path = os.path.join(root, \"annotations\", anno_file)\n        assert os.path.exists(self.anno_path), \"file '{}' does not exist.\".format(self.anno_path)\n\n        self.mode = dataset\n        self.size = size\n        self.coco = COCO(self.anno_path)\n\n        self.coco91_id2classes = dict([(v[\"id\"], v[\"name\"]) for k, v in self.coco.cats.items()])\n        coco90_classes2id = dict([(v[\"name\"], v[\"id\"]) for k, v in self.coco.cats.items()])\n\n        self.coco80_classes = coco80_names\n        self.coco_id80_to_id91 = dict([(i, coco90_classes2id[k]) for i, k in enumerate(coco80_names)])\n\n        ids = list(sorted(self.coco.imgs.keys()))\n\n        # 移除没有目标，或者目标面积非常小的数据\n        valid_ids = _coco_remove_images_without_annotations(self.coco, ids)\n        self.ids = valid_ids\n\n    def parse_targets(self,\n                      coco_targets: list,\n                      w: int = None,\n                      h: int = None,\n                      ratio: tuple = None,\n                      pad: tuple = None):\n        assert w > 0\n        assert h > 0\n\n        # 只筛选出单个对象的情况\n        anno = [obj for obj in coco_targets if obj['iscrowd'] == 0]\n\n        boxes = [obj[\"bbox\"] for obj in anno]\n\n        # guard against no boxes via resizing\n        boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4)\n        # [xmin, ymin, w, h] -> [xmin, ymin, xmax, ymax]\n        boxes[:, 2:] += boxes[:, :2]\n        boxes[:, 0::2] = np.clip(boxes[:, 0::2], a_min=0, a_max=w)\n        boxes[:, 1::2] = np.clip(boxes[:, 1::2], a_min=0, a_max=h)\n\n        classes = [self.coco80_classes.index(self.coco91_id2classes[obj[\"category_id\"]])\n                   for obj in anno]\n        classes = np.array(classes, dtype=int)\n\n        # 筛选出合法的目标，即x_max>x_min且y_max>y_min\n        keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])\n        boxes = boxes[keep]\n        classes = classes[keep]\n\n        if ratio is not None:\n            # width, height ratios\n            boxes[:, 0::2] *= ratio[0]\n            boxes[:, 1::2] *= ratio[1]\n\n        if pad is not None:\n            # dw, dh padding\n            dw, dh = pad\n            boxes[:, 0::2] += dw\n            boxes[:, 1::2] += dh\n\n        target_annotations = []\n        for i in range(boxes.shape[0]):\n            target_annotation = {\n                \"category_id\": int(classes[i]),\n                \"bbox\": boxes[i].tolist()\n            }\n            target_annotations.append(target_annotation)\n\n        return target_annotations\n\n    def __getitem__(self, index):\n        \"\"\"\n        Get an item from the dataset at the specified index.\n        Detection boxes are converted from absolute coordinates to relative coordinates\n        between 0 and 1 by dividing xmin, xmax by image width and ymin, ymax by image height.\n\n        :return: (annotation, input_image, metadata) where annotation is (index, target_annotation)\n                 with target_annotation as a dictionary with keys category_id, image_width, image_height\n                 and bbox, containing the relative bounding box coordinates [xmin, ymin, xmax, ymax]\n                 (with values between 0 and 1) and metadata a dictionary: {\"filename\": path_to_image}\n        \"\"\"\n        coco = self.coco\n        img_id = self.ids[index]\n        ann_ids = coco.getAnnIds(imgIds=img_id)\n        coco_target = coco.loadAnns(ann_ids)\n\n        image_path = coco.loadImgs(img_id)[0]['file_name']\n        img = cv2.imread(os.path.join(self.img_root, image_path))\n\n        origin_h, origin_w, c = img.shape\n        image, ratio, pad = letterbox(img, auto=False, new_shape=self.size)\n        target_annotations = self.parse_targets(coco_target, origin_w, origin_h, ratio, pad)\n\n        item_annotation = (index, target_annotations)\n        input_image = np.expand_dims(image.transpose(2, 0, 1), axis=0).astype(\n            np.float32\n        )\n        return (\n            item_annotation,\n            input_image,\n            {\"filename\": str(image_path),\n             \"origin_shape\": img.shape,\n             \"shape\": image.shape,\n             \"img_id\": img_id,\n             \"ratio_pad\": [ratio, pad]},\n        )\n\n    def __len__(self):\n        return len(self.ids)\n\n    @staticmethod\n    def collate_fn(x):\n        return x\n\n\nclass EvalCOCOMetric:\n    def __init__(self,\n                 coco: COCO = None,\n                 iou_type: str = \"bbox\",\n                 results_file_name: str = \"predict_results.json\",\n                 classes_mapping: dict = None):\n        self.coco = copy.deepcopy(coco)\n        self.results = []\n        self.classes_mapping = classes_mapping\n        self.coco_evaluator = None\n        assert iou_type in [\"bbox\"]\n        self.iou_type = iou_type\n        self.results_file_name = results_file_name\n\n    def prepare_for_coco_detection(self, ann, output):\n        \"\"\"将预测的结果转换成COCOeval指定的格式，针对目标检测任务\"\"\"\n        # 遍历每张图像的预测结果\n        if len(output[0]) == 0:\n            return\n\n        img_id = ann[2][\"img_id\"]\n        per_image_boxes = output[0]\n        per_image_boxes = scale_coords(img1_shape=ann[2][\"shape\"],\n                                       coords=per_image_boxes,\n                                       img0_shape=ann[2][\"origin_shape\"],\n                                       ratio_pad=ann[2][\"ratio_pad\"])\n        # 对于coco_eval, 需要的每个box的数据格式为[x_min, y_min, w, h]\n        # 而我们预测的box格式是[x_min, y_min, x_max, y_max]，所以需要转下格式\n        per_image_boxes[:, 2:] -= per_image_boxes[:, :2]\n        per_image_classes = output[1].tolist()\n        per_image_scores = output[2].tolist()\n\n        # 遍历每个目标的信息\n        for object_score, object_class, object_box in zip(\n                per_image_scores, per_image_classes, per_image_boxes):\n            object_score = float(object_score)\n            class_idx = int(object_class)\n            if self.classes_mapping is not None:\n                class_idx = self.classes_mapping[class_idx]\n            # We recommend rounding coordinates to the nearest tenth of a pixel\n            # to reduce resulting JSON file size.\n            object_box = [round(b, 2) for b in object_box.tolist()]\n\n            res = {\"image_id\": img_id,\n                   \"category_id\": class_idx,\n                   \"bbox\": object_box,\n                   \"score\": round(object_score, 3)}\n            self.results.append(res)\n\n    def update(self, targets, outputs):\n        if self.iou_type == \"bbox\":\n            self.prepare_for_coco_detection(targets, outputs)\n        else:\n            raise KeyError(f\"not support iou_type: {self.iou_type}\")\n\n    def evaluate(self):\n        # write predict results into json file\n        json_str = json.dumps(self.results, indent=4)\n        with open(self.results_file_name, 'w') as json_file:\n            json_file.write(json_str)\n\n        # accumulate predictions from all images\n        coco_true = self.coco\n        coco_pre = coco_true.loadRes(self.results_file_name)\n\n        self.coco_evaluator = COCOeval(cocoGt=coco_true, cocoDt=coco_pre, iouType=self.iou_type)\n\n        self.coco_evaluator.evaluate()\n        self.coco_evaluator.accumulate()\n        print(f\"IoU metric: {self.iou_type}\")\n        self.coco_evaluator.summarize()\n\n        coco_info = self.coco_evaluator.stats.tolist()  # numpy to list\n        return coco_info\n\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_tensorrt/convert_resnet34/compare_onnx_and_trt.py",
    "content": "import numpy as np\nimport tensorrt as trt\nimport onnxruntime\nimport pycuda.driver as cuda\nimport pycuda.autoinit\n\n\ndef normalize(image: np.ndarray) -> np.ndarray:\n    \"\"\"\n    Normalize the image to the given mean and standard deviation\n    \"\"\"\n    image = image.astype(np.float32)\n    mean = (0.485, 0.456, 0.406)\n    std = (0.229, 0.224, 0.225)\n    image /= 255.0\n    image -= mean\n    image /= std\n    return image\n\n\ndef onnx_inference(onnx_path: str, image: np.ndarray):\n    # load onnx model\n    ort_session = onnxruntime.InferenceSession(onnx_path)\n\n    # compute onnx Runtime output prediction\n    ort_inputs = {ort_session.get_inputs()[0].name: image}\n    res_onnx = ort_session.run(None, ort_inputs)[0]\n    return res_onnx\n\n\ndef trt_inference(trt_path: str, image: np.ndarray):\n    # Load the network in Inference Engine\n    trt_logger = trt.Logger(trt.Logger.WARNING)\n    with open(trt_path, \"rb\") as f, trt.Runtime(trt_logger) as runtime:\n        engine = runtime.deserialize_cuda_engine(f.read())\n\n    with engine.create_execution_context() as context:\n        # Set input shape based on image dimensions for inference\n        context.set_binding_shape(engine.get_binding_index(\"input\"), (1, 3, image.shape[-2], image.shape[-1]))\n        # Allocate host and device buffers\n        bindings = []\n        for binding in engine:\n            binding_idx = engine.get_binding_index(binding)\n            size = trt.volume(context.get_binding_shape(binding_idx))\n            dtype = trt.nptype(engine.get_binding_dtype(binding))\n            if engine.binding_is_input(binding):\n                input_buffer = np.ascontiguousarray(image)\n                input_memory = cuda.mem_alloc(image.nbytes)\n                bindings.append(int(input_memory))\n            else:\n                output_buffer = cuda.pagelocked_empty(size, dtype)\n                output_memory = cuda.mem_alloc(output_buffer.nbytes)\n                bindings.append(int(output_memory))\n\n        stream = cuda.Stream()\n        # Transfer input data to the GPU.\n        cuda.memcpy_htod_async(input_memory, input_buffer, stream)\n        # Run inference\n        context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)\n        # Transfer prediction output from the GPU.\n        cuda.memcpy_dtoh_async(output_buffer, output_memory, stream)\n        # Synchronize the stream\n        stream.synchronize()\n\n        res_trt = np.reshape(output_buffer, (1, -1))\n\n    return res_trt\n\n\ndef main():\n    image_h = 224\n    image_w = 224\n    onnx_path = \"resnet34.onnx\"\n    trt_path = \"trt_output/resnet34.trt\"\n\n    image = np.random.randn(image_h, image_w, 3)\n    normalized_image = normalize(image)\n\n    # Convert the resized images to network input shape\n    # [h, w, c] -> [c, h, w] -> [1, c, h, w]\n    normalized_image = np.expand_dims(np.transpose(normalized_image, (2, 0, 1)), 0)\n\n    onnx_res = onnx_inference(onnx_path, normalized_image)\n    ir_res = trt_inference(trt_path, normalized_image)\n    np.testing.assert_allclose(onnx_res, ir_res, rtol=1e-03, atol=1e-05)\n    print(\"Exported model has been tested with TensorRT Runtime, and the result looks good!\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_tensorrt/convert_resnet34/convert_pytorch2onnx.py",
    "content": "import torch\nimport torch.onnx\nimport onnx\nimport onnxruntime\nimport numpy as np\nfrom torchvision.models import resnet34\n\ndevice = torch.device(\"cpu\")\n\n\ndef to_numpy(tensor):\n    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()\n\n\ndef main():\n    weights_path = \"resNet34(flower).pth\"\n    onnx_file_name = \"resnet34.onnx\"\n    batch_size = 1\n    img_h = 224\n    img_w = 224\n    img_channel = 3\n\n    # create model and load pretrain weights\n    model = resnet34(pretrained=False, num_classes=5)\n    model.load_state_dict(torch.load(weights_path, map_location='cpu'))\n\n    model.eval()\n    # input to the model\n    # [batch, channel, height, width]\n    x = torch.rand(batch_size, img_channel, img_h, img_w, requires_grad=True)\n    torch_out = model(x)\n\n    # export the model\n    torch.onnx.export(model,             # model being run\n                      x,                 # model input (or a tuple for multiple inputs)\n                      onnx_file_name,    # where to save the model (can be a file or file-like object)\n                      input_names=[\"input\"],\n                      output_names=[\"output\"],\n                      verbose=False)\n\n    # check onnx model\n    onnx_model = onnx.load(onnx_file_name)\n    onnx.checker.check_model(onnx_model)\n\n    ort_session = onnxruntime.InferenceSession(onnx_file_name)\n\n    # compute ONNX Runtime output prediction\n    ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}\n    ort_outs = ort_session.run(None, ort_inputs)\n\n    # compare ONNX Runtime and Pytorch results\n    # assert_allclose: Raises an AssertionError if two objects are not equal up to desired tolerance.\n    np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05)\n    print(\"Exported model has been tested with ONNXRuntime, and the result looks good!\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_tensorrt/convert_resnet34/my_dataset.py",
    "content": "from PIL import Image\nimport torch\nfrom torch.utils.data import Dataset\n\n\nclass MyDataSet(Dataset):\n    \"\"\"自定义数据集\"\"\"\n\n    def __init__(self, images_path: list, images_class: list, transform=None):\n        self.images_path = images_path\n        self.images_class = images_class\n        self.transform = transform\n\n    def __len__(self):\n        return len(self.images_path)\n\n    def __getitem__(self, item):\n        img = Image.open(self.images_path[item])\n        # RGB为彩色图片，L为灰度图片\n        if img.mode != 'RGB':\n            raise ValueError(\"image: {} isn't RGB mode.\".format(self.images_path[item]))\n        label = self.images_class[item]\n\n        if self.transform is not None:\n            img = self.transform(img)\n\n        return img, label\n\n    @staticmethod\n    def collate_fn(batch):\n        # 官方实现的default_collate可以参考\n        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py\n        images, labels = tuple(zip(*batch))\n\n        images = torch.stack(images, dim=0)\n        labels = torch.as_tensor(labels)\n        return images, labels\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_tensorrt/convert_resnet34/quantization.py",
    "content": "\"\"\"\nrefer to:\nhttps://docs.nvidia.com/deeplearning/tensorrt/pytorch-quantization-toolkit/docs/userguide.html\n\"\"\"\nimport os\nimport math\nimport argparse\n\nfrom absl import logging\nfrom tqdm import tqdm\nimport torch\nimport torch.optim as optim\nimport torch.optim.lr_scheduler as lr_scheduler\nfrom torchvision import transforms\nfrom torchvision.models.resnet import resnet34 as create_model\nfrom pytorch_quantization import nn as quant_nn\nfrom pytorch_quantization import quant_modules, calib\nfrom pytorch_quantization.tensor_quant import QuantDescriptor\n\nfrom my_dataset import MyDataSet\nfrom utils import read_split_data, train_one_epoch, evaluate\n\nlogging.set_verbosity(logging.FATAL)\n\n\ndef export_onnx(model, onnx_filename, onnx_bs):\n    model.eval()\n    # We have to shift to pytorch's fake quant ops before exporting the model to ONNX\n    quant_nn.TensorQuantizer.use_fb_fake_quant = True\n    opset_version = 13\n\n    print(f\"Export ONNX file: {onnx_filename}\")\n    dummy_input = torch.randn(onnx_bs, 3, 224, 224).cuda()\n    torch.onnx.export(model,\n                      dummy_input,\n                      onnx_filename,\n                      verbose=False,\n                      opset_version=opset_version,\n                      enable_onnx_checker=False,\n                      input_names=[\"input\"],\n                      output_names=[\"output\"])\n\n\ndef collect_stats(model, data_loader, num_batches):\n    \"\"\"Feed data to the network and collect statistic\"\"\"\n\n    # Enable calibrators\n    for name, module in model.named_modules():\n        if isinstance(module, quant_nn.TensorQuantizer):\n            if module._calibrator is not None:\n                module.disable_quant()\n                module.enable_calib()\n            else:\n                module.disable()\n\n    for i, (images, _) in tqdm(enumerate(data_loader), total=num_batches):\n        model(images.cuda())\n        if i >= num_batches:\n            break\n\n    # Disable calibrators\n    for name, module in model.named_modules():\n        if isinstance(module, quant_nn.TensorQuantizer):\n            if module._calibrator is not None:\n                module.enable_quant()\n                module.disable_calib()\n            else:\n                module.enable()\n\n\ndef compute_amax(model, **kwargs):\n    # Load calib result\n    for name, module in model.named_modules():\n        if isinstance(module, quant_nn.TensorQuantizer):\n            if module._calibrator is not None:\n                if isinstance(module._calibrator, calib.MaxCalibrator):\n                    module.load_calib_amax()\n                else:\n                    module.load_calib_amax(**kwargs)\n            print(f\"{name:40}: {module}\")\n    model.cuda()\n\n\ndef main(args):\n    quant_modules.initialize()\n    assert torch.cuda.is_available(), \"only support GPU!\"\n\n    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.RandomResizedCrop(224),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),\n        \"val\": transforms.Compose([transforms.Resize(256),\n                                   transforms.CenterCrop(224),\n                                   transforms.ToTensor(),\n                                   transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])}\n\n    # 实例化训练数据集\n    train_dataset = MyDataSet(images_path=train_images_path,\n                              images_class=train_images_label,\n                              transform=data_transform[\"train\"])\n\n    # 实例化验证数据集\n    val_dataset = MyDataSet(images_path=val_images_path,\n                            images_class=val_images_label,\n                            transform=data_transform[\"val\"])\n\n    batch_size = args.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using {} dataloader workers every process'.format(nw))\n    train_loader = torch.utils.data.DataLoader(train_dataset,\n                                               batch_size=batch_size,\n                                               shuffle=True,\n                                               pin_memory=True,\n                                               num_workers=nw,\n                                               collate_fn=train_dataset.collate_fn)\n\n    val_loader = torch.utils.data.DataLoader(val_dataset,\n                                             batch_size=batch_size,\n                                             shuffle=False,\n                                             pin_memory=True,\n                                             num_workers=nw,\n                                             collate_fn=val_dataset.collate_fn)\n\n    # ########################## #\n    # Post Training Quantization #\n    # ########################## #\n    # We will use histogram based calibration for activations and the default max calibration for weights.\n    quant_desc_input = QuantDescriptor(calib_method='histogram')\n    quant_nn.QuantConv2d.set_default_quant_desc_input(quant_desc_input)\n    quant_nn.QuantLinear.set_default_quant_desc_input(quant_desc_input)\n\n    model = create_model(num_classes=args.num_classes)\n    assert os.path.exists(args.weights), \"weights file: '{}' not exist.\".format(args.weights)\n    model.load_state_dict(torch.load(args.weights, map_location='cpu'))\n    model.cuda()\n\n    # It is a bit slow since we collect histograms on CPU\n    with torch.no_grad():\n        collect_stats(model, val_loader, num_batches=1000 // batch_size)\n        compute_amax(model, method=\"percentile\", percentile=99.99)\n        # validate\n        evaluate(model=model, data_loader=val_loader, epoch=0)\n\n    torch.save(model.state_dict(), \"quant_model_calibrated.pth\")\n\n    if args.qat:\n        # ########################### #\n        # Quantization Aware Training #\n        # ########################### #\n        pg = [p for p in model.parameters() if p.requires_grad]\n        optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=5E-5)\n        # Scheduler(half of a cosine period)\n        lf = lambda x: (math.cos(x * math.pi / 2 / args.epochs)) * (1 - args.lrf) + args.lrf\n        scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)\n\n        for epoch in range(args.epochs):\n            # train\n            train_one_epoch(model=model, optimizer=optimizer, data_loader=train_loader, epoch=epoch)\n\n            scheduler.step()\n\n            # validate\n            evaluate(model=model, data_loader=val_loader, epoch=epoch)\n\n    export_onnx(model, args.onnx_filename, args.onnx_bs)\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--num_classes', type=int, default=5)\n    parser.add_argument('--epochs', type=int, default=5)\n    parser.add_argument('--batch-size', type=int, default=8)\n    parser.add_argument('--lr', type=float, default=0.0001)\n    parser.add_argument('--lrf', type=float, default=0.01)\n\n    # 数据集所在根目录\n    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz\n    parser.add_argument('--data-path', type=str,\n                        default=\"/data/flower_photos\")\n\n    # 训练好的权重路径\n    parser.add_argument('--weights', type=str, default='./resNet(flower).pth',\n                        help='trained weights path')\n\n    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')\n\n    parser.add_argument('--onnx-filename', default='resnet34.onnx', help='save onnx model filename')\n    parser.add_argument('--onnx-bs', default=1, help='save onnx model batch size')\n    parser.add_argument('--qat', type=bool, default=True, help='whether use quantization aware training')\n\n    opt = parser.parse_args()\n\n    main(opt)\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/convert_tensorrt/convert_resnet34/utils.py",
    "content": "import os\nimport sys\nimport json\nimport pickle\nimport random\n\nimport torch\nfrom tqdm import tqdm\n\n\ndef read_split_data(root: str, val_rate: float = 0.2):\n    random.seed(0)  # 保证随机结果可复现\n    assert os.path.exists(root), \"dataset root: {} does not exist.\".format(root)\n\n    # 遍历文件夹，一个文件夹对应一个类别\n    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]\n    # 排序，保证顺序一致\n    flower_class.sort()\n    # 生成类别名称以及对应的数字索引\n    class_indices = dict((k, v) for v, k in enumerate(flower_class))\n    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    train_images_path = []  # 存储训练集的所有图片路径\n    train_images_label = []  # 存储训练集图片对应索引信息\n    val_images_path = []  # 存储验证集的所有图片路径\n    val_images_label = []  # 存储验证集图片对应索引信息\n    every_class_num = []  # 存储每个类别的样本总数\n    supported = [\".jpg\", \".JPG\", \".png\", \".PNG\"]  # 支持的文件后缀类型\n    # 遍历每个文件夹下的文件\n    for cla in flower_class:\n        cla_path = os.path.join(root, cla)\n        # 遍历获取supported支持的所有文件路径\n        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)\n                  if os.path.splitext(i)[-1] in supported]\n        # 获取该类别对应的索引\n        image_class = class_indices[cla]\n        # 记录该类别的样本数量\n        every_class_num.append(len(images))\n        # 按比例随机采样验证样本\n        val_path = random.sample(images, k=int(len(images) * val_rate))\n\n        for img_path in images:\n            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集\n                val_images_path.append(img_path)\n                val_images_label.append(image_class)\n            else:  # 否则存入训练集\n                train_images_path.append(img_path)\n                train_images_label.append(image_class)\n\n    print(\"{} images were found in the dataset.\".format(sum(every_class_num)))\n    print(\"{} images for training.\".format(len(train_images_path)))\n    print(\"{} images for validation.\".format(len(val_images_path)))\n\n    return train_images_path, train_images_label, val_images_path, val_images_label\n\n\ndef write_pickle(list_info: list, file_name: str):\n    with open(file_name, 'wb') as f:\n        pickle.dump(list_info, f)\n\n\ndef read_pickle(file_name: str) -> list:\n    with open(file_name, 'rb') as f:\n        info_list = pickle.load(f)\n        return info_list\n\n\ndef train_one_epoch(model, optimizer, data_loader, epoch):\n    model.train()\n    loss_function = torch.nn.CrossEntropyLoss()\n    accu_loss = torch.zeros(1).cuda()  # 累计损失\n    accu_num = torch.zeros(1).cuda()   # 累计预测正确的样本数\n    optimizer.zero_grad()\n\n    sample_num = 0\n    data_loader = tqdm(data_loader, file=sys.stdout)\n    for step, data in enumerate(data_loader):\n        images, labels = data\n        sample_num += images.shape[0]\n\n        pred = model(images.cuda())\n        pred_classes = torch.max(pred, dim=1)[1]\n        accu_num += torch.eq(pred_classes, labels.cuda()).sum()\n\n        loss = loss_function(pred, labels.cuda())\n        loss.backward()\n        accu_loss += loss.detach()\n\n        data_loader.desc = \"[train epoch {}] loss: {:.3f}, acc: {:.3f}\".format(epoch,\n                                                                               accu_loss.item() / (step + 1),\n                                                                               accu_num.item() / sample_num)\n\n        if not torch.isfinite(loss):\n            print('WARNING: non-finite loss, ending training ', loss)\n            sys.exit(1)\n\n        optimizer.step()\n        optimizer.zero_grad()\n\n    return accu_loss.item() / (step + 1), accu_num.item() / sample_num\n\n\n@torch.no_grad()\ndef evaluate(model, data_loader, epoch):\n    loss_function = torch.nn.CrossEntropyLoss()\n\n    model.eval()\n\n    accu_num = torch.zeros(1).cuda()   # 累计预测正确的样本数\n    accu_loss = torch.zeros(1).cuda()  # 累计损失\n\n    sample_num = 0\n    data_loader = tqdm(data_loader, file=sys.stdout)\n    for step, data in enumerate(data_loader):\n        images, labels = data\n        sample_num += images.shape[0]\n\n        pred = model(images.cuda())\n        pred_classes = torch.max(pred, dim=1)[1]\n        accu_num += torch.eq(pred_classes, labels.cuda()).sum()\n\n        loss = loss_function(pred, labels.cuda())\n        accu_loss += loss\n\n        data_loader.desc = \"[valid epoch {}] loss: {:.3f}, acc: {:.3f}\".format(epoch,\n                                                                               accu_loss.item() / (step + 1),\n                                                                               accu_num.item() / sample_num)\n\n    return accu_loss.item() / (step + 1), accu_num.item() / sample_num\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/pytorch_flask_service/class_indices.json",
    "content": "{\n    \"0\": \"daisy\",\n    \"1\": \"dandelion\",\n    \"2\": \"roses\",\n    \"3\": \"sunflowers\",\n    \"4\": \"tulips\"\n}"
  },
  {
    "path": "deploying_service/deploying_pytorch/pytorch_flask_service/main.py",
    "content": "import os\nimport io\nimport json\nimport torch\nimport torchvision.transforms as transforms\nfrom PIL import Image\nfrom flask import Flask, jsonify, request, render_template\nfrom flask_cors import CORS\nfrom model import MobileNetV2\n\napp = Flask(__name__)\nCORS(app)  # 解决跨域问题\n\nweights_path = \"./MobileNetV2(flower).pth\"\nclass_json_path = \"./class_indices.json\"\nassert os.path.exists(weights_path), \"weights path does not exist...\"\nassert os.path.exists(class_json_path), \"class json path does not exist...\"\n\n# select device\ndevice = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\nprint(device)\n# create model\nmodel = MobileNetV2(num_classes=5).to(device)\n# load model weights\nmodel.load_state_dict(torch.load(weights_path, map_location=device))\n\nmodel.eval()\n\n# load class info\njson_file = open(class_json_path, 'rb')\nclass_indict = json.load(json_file)\n\n\ndef transform_image(image_bytes):\n    my_transforms = transforms.Compose([transforms.Resize(255),\n                                        transforms.CenterCrop(224),\n                                        transforms.ToTensor(),\n                                        transforms.Normalize(\n                                            [0.485, 0.456, 0.406],\n                                            [0.229, 0.224, 0.225])])\n    image = Image.open(io.BytesIO(image_bytes))\n    if image.mode != \"RGB\":\n        raise ValueError(\"input file does not RGB image...\")\n    return my_transforms(image).unsqueeze(0).to(device)\n\n\ndef get_prediction(image_bytes):\n    try:\n        tensor = transform_image(image_bytes=image_bytes)\n        outputs = torch.softmax(model.forward(tensor).squeeze(), dim=0)\n        prediction = outputs.detach().cpu().numpy()\n        template = \"class:{:<15} probability:{:.3f}\"\n        index_pre = [(class_indict[str(index)], float(p)) for index, p in enumerate(prediction)]\n        # sort probability\n        index_pre.sort(key=lambda x: x[1], reverse=True)\n        text = [template.format(k, v) for k, v in index_pre]\n        return_info = {\"result\": text}\n    except Exception as e:\n        return_info = {\"result\": [str(e)]}\n    return return_info\n\n\n@app.route(\"/predict\", methods=[\"POST\"])\n@torch.no_grad()\ndef predict():\n    image = request.files[\"file\"]\n    img_bytes = image.read()\n    info = get_prediction(image_bytes=img_bytes)\n    return jsonify(info)\n\n\n@app.route(\"/\", methods=[\"GET\", \"POST\"])\ndef root():\n    return render_template(\"up.html\")\n\n\nif __name__ == '__main__':\n    app.run(host=\"0.0.0.0\", port=5000)\n\n\n\n\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/pytorch_flask_service/model.py",
    "content": "from torch import nn\nimport torch\n\n\ndef _make_divisible(ch, divisor=8, min_ch=None):\n    \"\"\"\n    This function is taken from the original tf repo.\n    It ensures that all layers have a channel number that is divisible by 8\n    It can be seen here:\n    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py\n    \"\"\"\n    if min_ch is None:\n        min_ch = divisor\n    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)\n    # Make sure that round down does not go down by more than 10%.\n    if new_ch < 0.9 * ch:\n        new_ch += divisor\n    return new_ch\n\n\nclass ConvBNReLU(nn.Sequential):\n    def __init__(self, in_channel, out_channel, kernel_size=3, stride=1, groups=1):\n        padding = (kernel_size - 1) // 2\n        super(ConvBNReLU, self).__init__(\n            nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, groups=groups, bias=False),\n            nn.BatchNorm2d(out_channel),\n            nn.ReLU6(inplace=True)\n        )\n\n\nclass InvertedResidual(nn.Module):\n    def __init__(self, in_channel, out_channel, stride, expand_ratio):\n        super(InvertedResidual, self).__init__()\n        hidden_channel = in_channel * expand_ratio\n        self.use_shortcut = stride == 1 and in_channel == out_channel\n\n        layers = []\n        if expand_ratio != 1:\n            # 1x1 pointwise conv\n            layers.append(ConvBNReLU(in_channel, hidden_channel, kernel_size=1))\n        layers.extend([\n            # 3x3 depthwise conv\n            ConvBNReLU(hidden_channel, hidden_channel, stride=stride, groups=hidden_channel),\n            # 1x1 pointwise conv(linear)\n            nn.Conv2d(hidden_channel, out_channel, kernel_size=1, bias=False),\n            nn.BatchNorm2d(out_channel),\n        ])\n\n        self.conv = nn.Sequential(*layers)\n\n    def forward(self, x):\n        if self.use_shortcut:\n            return x + self.conv(x)\n        else:\n            return self.conv(x)\n\n\nclass MobileNetV2(nn.Module):\n    def __init__(self, num_classes=1000, alpha=1.0, round_nearest=8):\n        super(MobileNetV2, self).__init__()\n        block = InvertedResidual\n        input_channel = _make_divisible(32 * alpha, round_nearest)\n        last_channel = _make_divisible(1280 * alpha, round_nearest)\n\n        inverted_residual_setting = [\n            # t, c, n, s\n            [1, 16, 1, 1],\n            [6, 24, 2, 2],\n            [6, 32, 3, 2],\n            [6, 64, 4, 2],\n            [6, 96, 3, 1],\n            [6, 160, 3, 2],\n            [6, 320, 1, 1],\n        ]\n\n        features = []\n        # conv1 layer\n        features.append(ConvBNReLU(3, input_channel, stride=2))\n        # building inverted residual residual blockes\n        for t, c, n, s in inverted_residual_setting:\n            output_channel = _make_divisible(c * alpha, round_nearest)\n            for i in range(n):\n                stride = s if i == 0 else 1\n                features.append(block(input_channel, output_channel, stride, expand_ratio=t))\n                input_channel = output_channel\n        # building last several layers\n        features.append(ConvBNReLU(input_channel, last_channel, 1))\n        # combine feature layers\n        self.features = nn.Sequential(*features)\n\n        # building classifier\n        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))\n        self.classifier = nn.Sequential(\n            nn.Dropout(0.2),\n            nn.Linear(last_channel, num_classes)\n        )\n\n        # weight initialization\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode='fan_out')\n                if m.bias is not None:\n                    nn.init.zeros_(m.bias)\n            elif isinstance(m, nn.BatchNorm2d):\n                nn.init.ones_(m.weight)\n                nn.init.zeros_(m.bias)\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                nn.init.zeros_(m.bias)\n\n    def forward(self, x):\n        x = self.features(x)\n        x = self.avgpool(x)\n        x = torch.flatten(x, 1)\n        x = self.classifier(x)\n        return x\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/pytorch_flask_service/requirements.txt",
    "content": "Flask==2.2.5\nFlask_Cors==3.0.9\nPillow\n"
  },
  {
    "path": "deploying_service/deploying_pytorch/pytorch_flask_service/templates/up.html",
    "content": "<!DOCTYPE html>\r\n<html>\r\n<head>\r\n    <title>HTML5上传图片并预览</title>\r\n    <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\r\n    <script src=\"{{ url_for('static', filename='js/jquery.min.js') }}\"></script>\r\n</head>\r\n<body>\r\n<!--<h3>请选择图片文件：PNG/JPG/JPEG/SVG/GIF</h3>-->\r\n<div style=\"text-align: left;margin-left:500px;margin-top:100px;\" >\r\n    <div style=\"float:left;\">\r\n        <a href=\"javascript:;\" class=\"file\">选择文件\r\n            <input type=\"file\" name=\"file\" id=\"file0\"><br>\r\n        </a>\r\n        <img src=\"\" id=\"img0\" style=\"margin-top:20px;width: 35rem;height: 30rem;\">\r\n    </div>\r\n    <div style=\"float:left;margin-left:50px;\">\r\n        <input type=\"button\" id=\"b0\" onclick=\"test()\" value=\"预测\">\r\n        <pre  id=\"out\" style=\"width:320px;height:50px;line-height: 50px;margin-top:20px;\"></pre>\r\n    </div>\r\n</div>\r\n\r\n<script type=\"text/javascript\">\r\n    $(\"#file0\").change(function(){\r\n        var objUrl = getObjectURL(this.files[0]) ;//获取文件信息\r\n        console.log(\"objUrl = \"+objUrl);\r\n        if (objUrl) {\r\n            $(\"#img0\").attr(\"src\", objUrl);\r\n        }\r\n    });\r\n\r\n    function test() {\r\n        var fileobj = $(\"#file0\")[0].files[0];\r\n        console.log(fileobj);\r\n        var form = new FormData();\r\n        form.append(\"file\", fileobj);\r\n        var out='';\r\n        var flower='';\r\n        $.ajax({\r\n            type: 'POST',\r\n            url: \"predict\",\r\n            data: form,\r\n            async: false,       //同步执行\r\n            processData: false, // 告诉jquery要传输data对象\r\n            contentType: false, //告诉jquery不需要增加请求头对于contentType的设置\r\n            success: function (arg) {\r\n            console.log(arg)\r\n            out = arg.result;\r\n        },error:function(){\r\n                console.log(\"后台处理错误\");\r\n            }\r\n    });\r\n\r\n        out.forEach(e=>{\r\n            flower+=`<div style=\"border-bottom: 1px solid #CCCCCC;line-height: 60px;font-size:16px;\">${e}</div>`\r\n        });\r\n\r\n        document.getElementById(\"out\").innerHTML=flower;\r\n\r\n    }\r\n\r\n    function getObjectURL(file) {\r\n        var url = null;\r\n        if(window.createObjectURL!=undefined) {\r\n            url = window.createObjectURL(file) ;\r\n        }else if (window.URL!=undefined) { // mozilla(firefox)\r\n            url = window.URL.createObjectURL(file) ;\r\n        }else if (window.webkitURL!=undefined) { // webkit or chrome\r\n            url = window.webkitURL.createObjectURL(file) ;\r\n        }\r\n        return url ;\r\n    }\r\n</script>\r\n<style>\r\n    .file {\r\n        position: relative;\r\n        /*display: inline-block;*/\r\n        background: #CCC ;\r\n        border: 1px solid #CCC;\r\n        padding: 4px 4px;\r\n        overflow: hidden;\r\n        text-decoration: none;\r\n        text-indent: 0;\r\n        width:100px;\r\n        height:30px;\r\n        line-height: 30px;\r\n        border-radius: 5px;\r\n        color: #333;\r\n        font-size: 13px;\r\n\r\n    }\r\n    .file input {\r\n        position: absolute;\r\n        font-size: 13px;\r\n        right: 0;\r\n        top: 0;\r\n        opacity: 0;\r\n        border: 1px solid #333;\r\n        padding: 4px 4px;\r\n        overflow: hidden;\r\n        text-indent: 0;\r\n        width:100px;\r\n        height:30px;\r\n        line-height: 30px;\r\n        border-radius: 5px;\r\n        color: #FFFFFF;\r\n\r\n    }\r\n    #b0{\r\n        background: #1899FF;\r\n        border: 1px solid #CCC;\r\n        padding: 4px 10px;\r\n        overflow: hidden;\r\n        text-indent: 0;\r\n        width:60px;\r\n        height:28px;\r\n        line-height: 20px;\r\n        border-radius: 5px;\r\n        color: #FFFFFF;\r\n        font-size: 13px;\r\n    }\r\n\r\n    /*.gradient{*/\r\n\r\n        /*filter:alpha(opacity=100 finishopacity=50 style=1 startx=0,starty=0,finishx=0,finishy=150) progid:DXImageTransform.Microsoft.gradient(startcolorstr=#fff,endcolorstr=#ccc,gradientType=0);*/\r\n        /*-ms-filter:alpha(opacity=100 finishopacity=50 style=1 startx=0,starty=0,finishx=0,finishy=150) progid:DXImageTransform.Microsoft.gradient(startcolorstr=#fff,endcolorstr=#ccc,gradientType=0);!*IE8*!*/\r\n        /*background:#1899FF; !* 一些不支持背景渐变的浏览器 *!*/\r\n        /*background:-moz-linear-gradient(top, #fff, #1899FF);*/\r\n        /*background:-webkit-gradient(linear, 0 0, 0 bottom, from(#fff), to(#ccc));*/\r\n        /*background:-o-linear-gradient(top, #fff, #ccc);*/\r\n    /*}*/\r\n</style>\r\n</body>\r\n</html>\r\n\r\n\r\n"
  },
  {
    "path": "deploying_service/pruning_model_pytorch/class_indices.json",
    "content": "{\n    \"0\": \"daisy\",\n    \"1\": \"dandelion\",\n    \"2\": \"roses\",\n    \"3\": \"sunflowers\",\n    \"4\": \"tulips\"\n}"
  },
  {
    "path": "deploying_service/pruning_model_pytorch/main.py",
    "content": "import os\nimport torch\nfrom torchvision import transforms, datasets\nimport torch.nn.utils.prune as prune\nimport torch.nn.functional as F\nfrom tqdm import tqdm\nimport time\nfrom model import resnet34\n\ndevice = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n\ndata_transform = transforms.Compose([transforms.Resize(256),\n                                     transforms.CenterCrop(224),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])\n\ndata_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\nimage_path = data_root + \"/data_set/flower_data/\"  # flower data set path\nbatch_size = 16\n\n\ndef validate_model(model: torch.nn.Module):\n    validate_dataset = datasets.ImageFolder(root=image_path + \"val\",\n                                            transform=data_transform)\n    val_num = len(validate_dataset)\n    validate_loader = torch.utils.data.DataLoader(validate_dataset,\n                                                  batch_size=batch_size, shuffle=False,\n                                                  num_workers=2)\n\n    model.eval()\n    acc = 0.0  # accumulate accurate number / epoch\n    with torch.no_grad():\n        t1 = time.time()\n        for val_data in tqdm(validate_loader, desc=\"validate model accuracy.\"):\n            val_images, val_labels = val_data\n            outputs = model(val_images.to(device))  # eval model only have last output layer\n            predict_y = torch.max(outputs, dim=1)[1]\n            acc += torch.sum(torch.eq(predict_y, val_labels.to(device))).item()\n        val_accurate = acc / val_num\n        print('test_accuracy: %.3f, time:%.3f' % (val_accurate, time.time() - t1))\n\n    return val_accurate\n\n\ndef count_sparsity(model: torch.nn.Module, p=True):\n    sum_zeros_num = 0\n    sum_weights_num = 0\n    for name, module in model.named_modules():\n        if isinstance(module, torch.nn.Conv2d):\n            zeros_elements = torch.sum(torch.eq(module.weight, 0)).item()\n            weights_elements = module.weight.numel()\n\n            sum_zeros_num += zeros_elements\n            sum_weights_num += weights_elements\n            if p is True:\n                print(\"Sparsity in {}.weights {:.2f}%\".format(name, 100 * zeros_elements / weights_elements))\n    print(\"Global sparsity: {:.2f}%\".format(100 * sum_zeros_num / sum_weights_num))\n\n\ndef main():\n    weights_path = \"./resNet34.pth\"\n    model = resnet34(num_classes=5)\n    model.load_state_dict(torch.load(weights_path, map_location=device))\n    model.to(device)\n    # validate_model(model)\n    # module = model.conv1\n    # print(list(module.named_parameters()))\n    # # print(list(module.named_buffers()))\n    #\n    # # 裁剪50%的卷积核\n    # prune.ln_structured(module, name=\"weight\", amount=0.5, n=2, dim=0)\n    # print(list(module.weight))\n    # print(module.weight.shape)\n    # # print(list(module.named_buffers()))\n    #\n    # prune.remove(module, \"weight\")\n    # print(module.weight.shape)\n\n    # 收集所有需要裁剪的卷积核\n    parameters_to_prune = []\n    for name, module in model.named_modules():\n        if isinstance(module, torch.nn.Conv2d):\n            parameters_to_prune.append((module, \"weight\"))\n\n    # 对卷积核进行剪枝处理\n    prune.global_unstructured(parameters_to_prune,\n                              pruning_method=prune.L1Unstructured,\n                              amount=0.5)\n\n    # 统计剪枝比例\n    count_sparsity(model, p=False)\n\n    # 验证剪枝后的模型\n    validate_model(model)\n    # print(model)\n\n    # for name, module in model.named_modules():\n    #     if isinstance(module, torch.nn.Conv2d):\n    #         prune.remove(module, \"weight\")\n    # validate_model(model)\n\n    # torch.save(model.state_dict(), \"pruning_model.pth\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "deploying_service/pruning_model_pytorch/model.py",
    "content": "import torch.nn as nn\nimport torch\n\n\nclass BasicBlock(nn.Module):\n    expansion = 1\n\n    def __init__(self, in_channel, out_channel, stride=1, downsample=None):\n        super(BasicBlock, self).__init__()\n        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,\n                               kernel_size=3, stride=stride, padding=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(out_channel)\n        self.relu = nn.ReLU()\n        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,\n                               kernel_size=3, stride=1, padding=1, bias=False)\n        self.bn2 = nn.BatchNorm2d(out_channel)\n        self.downsample = downsample\n\n    def forward(self, x):\n        identity = x\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass Bottleneck(nn.Module):\n    expansion = 4\n\n    def __init__(self, in_channel, out_channel, stride=1, downsample=None):\n        super(Bottleneck, self).__init__()\n        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,\n                               kernel_size=1, stride=1, bias=False)  # squeeze channels\n        self.bn1 = nn.BatchNorm2d(out_channel)\n        # -----------------------------------------\n        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,\n                               kernel_size=3, stride=stride, bias=False, padding=1)\n        self.bn2 = nn.BatchNorm2d(out_channel)\n        # -----------------------------------------\n        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion,\n                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels\n        self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n\n    def forward(self, x):\n        identity = x\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass ResNet(nn.Module):\n\n    def __init__(self, block, blocks_num, num_classes=1000, include_top=True):\n        super(ResNet, self).__init__()\n        self.include_top = include_top\n        self.in_channel = 64\n\n        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,\n                               padding=3, bias=False)\n        self.bn1 = nn.BatchNorm2d(self.in_channel)\n        self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.layer1 = self._make_layer(block, 64, blocks_num[0])\n        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)\n        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)\n        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)\n        if self.include_top:\n            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)\n            self.fc = nn.Linear(512 * block.expansion, num_classes)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')\n\n    def _make_layer(self, block, channel, block_num, stride=1):\n        downsample = None\n        if stride != 1 or self.in_channel != channel * block.expansion:\n            downsample = nn.Sequential(\n                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),\n                nn.BatchNorm2d(channel * block.expansion))\n\n        layers = []\n        layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride))\n        self.in_channel = channel * block.expansion\n\n        for _ in range(1, block_num):\n            layers.append(block(self.in_channel, channel))\n\n        return nn.Sequential(*layers)\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n\n        if self.include_top:\n            x = self.avgpool(x)\n            x = torch.flatten(x, 1)\n            x = self.fc(x)\n\n        return x\n\n\ndef resnet34(num_classes=1000, include_top=True):\n    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)\n\n\ndef resnet101(num_classes=1000, include_top=True):\n    return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)\n"
  },
  {
    "path": "deploying_service/pruning_model_pytorch/predict.py",
    "content": "import torch\nfrom model import resnet34\nfrom PIL import Image\nfrom torchvision import transforms\nimport matplotlib.pyplot as plt\nimport json\n\ndevice = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n\ndata_transform = transforms.Compose(\n    [transforms.Resize(256),\n     transforms.CenterCrop(224),\n     transforms.ToTensor(),\n     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])\n\n# load image\nimg = Image.open(\"../tulip.jpg\")\nplt.imshow(img)\n# [N, C, H, W]\nimg = data_transform(img)\n# expand batch dimension\nimg = torch.unsqueeze(img, dim=0)\n\n# read class_indict\ntry:\n    json_file = open('./class_indices.json', 'r')\n    class_indict = json.load(json_file)\nexcept Exception as e:\n    print(e)\n    exit(-1)\n\n# create model\nmodel = resnet34(num_classes=5)\n# load model weights\nmodel_weight_path = \"./resNet34.pth\"\nmodel.load_state_dict(torch.load(model_weight_path, map_location=device))\nmodel.eval()\nwith torch.no_grad():\n    # predict class\n    output = torch.squeeze(model(img))\n    predict = torch.softmax(output, dim=0)\n    predict_cla = torch.argmax(predict).numpy()\nprint(class_indict[str(predict_cla)], predict[predict_cla].numpy())\nplt.show()\n"
  },
  {
    "path": "deploying_service/pruning_model_pytorch/train.py",
    "content": "import torch\nimport torch.nn as nn\nfrom torchvision import transforms, datasets\nimport json\nimport matplotlib.pyplot as plt\nimport os\nimport torch.optim as optim\nfrom model import resnet34, resnet101\n\n\ndevice = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\nprint(device)\n\ndata_transform = {\n    \"train\": transforms.Compose([transforms.RandomResizedCrop(224),\n                                 transforms.RandomHorizontalFlip(),\n                                 transforms.ToTensor(),\n                                 transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),\n    \"val\": transforms.Compose([transforms.Resize(256),\n                               transforms.CenterCrop(224),\n                               transforms.ToTensor(),\n                               transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}\n\n\ndata_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\nimage_path = data_root + \"/data_set/flower_data/\"  # flower data set path\n\ntrain_dataset = datasets.ImageFolder(root=image_path+\"train\",\n                                     transform=data_transform[\"train\"])\ntrain_num = len(train_dataset)\n\n# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}\nflower_list = train_dataset.class_to_idx\ncla_dict = dict((val, key) for key, val in flower_list.items())\n# write dict into json file\njson_str = json.dumps(cla_dict, indent=4)\nwith open('class_indices.json', 'w') as json_file:\n    json_file.write(json_str)\n\nbatch_size = 16\ntrain_loader = torch.utils.data.DataLoader(train_dataset,\n                                           batch_size=batch_size, shuffle=True,\n                                           num_workers=0)\n\nvalidate_dataset = datasets.ImageFolder(root=image_path + \"val\",\n                                        transform=data_transform[\"val\"])\nval_num = len(validate_dataset)\nvalidate_loader = torch.utils.data.DataLoader(validate_dataset,\n                                              batch_size=batch_size, shuffle=False,\n                                              num_workers=0)\n\nnet = resnet34()\n# load pretrain weights\nmodel_weight_path = \"./resnet34-pre.pth\"\nmissing_keys, unexpected_keys = net.load_state_dict(torch.load(model_weight_path), strict=False)\n# for param in net.parameters():\n#     param.requires_grad = False\n# change fc layer structure\ninchannel = net.fc.in_features\nnet.fc = nn.Linear(inchannel, 5)\nnet.to(device)\n\nloss_function = nn.CrossEntropyLoss()\noptimizer = optim.Adam(net.parameters(), lr=0.0001)\n\nbest_acc = 0.0\nsave_path = './resNet34.pth'\nfor epoch in range(3):\n    # train\n    net.train()\n    running_loss = 0.0\n    for step, data in enumerate(train_loader, start=0):\n        images, labels = data\n        optimizer.zero_grad()\n        logits = net(images.to(device))\n        loss = loss_function(logits, labels.to(device))\n        loss.backward()\n        optimizer.step()\n\n        # print statistics\n        running_loss += loss.item()\n        # print train process\n        rate = (step+1)/len(train_loader)\n        a = \"*\" * int(rate * 50)\n        b = \".\" * int((1 - rate) * 50)\n        print(\"\\rtrain loss: {:^3.0f}%[{}->{}]{:.4f}\".format(int(rate*100), a, b, loss), end=\"\")\n    print()\n\n    # validate\n    net.eval()\n    acc = 0.0  # accumulate accurate number / epoch\n    with torch.no_grad():\n        for val_data in validate_loader:\n            val_images, val_labels = val_data\n            outputs = net(val_images.to(device))  # eval model only have last output layer\n            # loss = loss_function(outputs, test_labels)\n            predict_y = torch.max(outputs, dim=1)[1]\n            acc += (predict_y == val_labels.to(device)).sum().item()\n        val_accurate = acc / val_num\n        if val_accurate > best_acc:\n            best_acc = val_accurate\n            torch.save(net.state_dict(), save_path)\n        print('[epoch %d] train_loss: %.3f  test_accuracy: %.3f' %\n              (epoch + 1, running_loss / step, val_accurate))\n\nprint('Finished Training')\n\n\n"
  },
  {
    "path": "others_project/draw_dilated_conv/main.py",
    "content": "import numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib.colors import LinearSegmentedColormap\n\n\ndef dilated_conv_one_pixel(center: (int, int),\n                           feature_map: np.ndarray,\n                           k: int = 3,\n                           r: int = 1,\n                           v: int = 1):\n    \"\"\"\n    膨胀卷积核中心在指定坐标center处时，统计哪些像素被利用到，\n    并在利用到的像素位置处加上增量v\n    Args:\n        center: 膨胀卷积核中心的坐标\n        feature_map: 记录每个像素使用次数的特征图\n        k: 膨胀卷积核的kernel大小\n        r: 膨胀卷积的dilation rate\n        v: 使用次数增量\n    \"\"\"\n    assert divmod(3, 2)[1] == 1\n\n    # left-top: (x, y)\n    left_top = (center[0] - ((k - 1) // 2) * r, center[1] - ((k - 1) // 2) * r)\n    for i in range(k):\n        for j in range(k):\n            feature_map[left_top[1] + i * r][left_top[0] + j * r] += v\n\n\ndef dilated_conv_all_map(dilated_map: np.ndarray,\n                         k: int = 3,\n                         r: int = 1):\n    \"\"\"\n    根据输出特征矩阵中哪些像素被使用以及使用次数，\n    配合膨胀卷积k和r计算输入特征矩阵哪些像素被使用以及使用次数\n    Args:\n        dilated_map: 记录输出特征矩阵中每个像素被使用次数的特征图\n        k: 膨胀卷积核的kernel大小\n        r: 膨胀卷积的dilation rate\n    \"\"\"\n    new_map = np.zeros_like(dilated_map)\n    for i in range(dilated_map.shape[0]):\n        for j in range(dilated_map.shape[1]):\n            if dilated_map[i][j] > 0:\n                dilated_conv_one_pixel((j, i), new_map, k=k, r=r, v=dilated_map[i][j])\n\n    return new_map\n\n\ndef plot_map(matrix: np.ndarray):\n    plt.figure()\n\n    c_list = ['white', 'blue', 'red']\n    new_cmp = LinearSegmentedColormap.from_list('chaos', c_list)\n    plt.imshow(matrix, cmap=new_cmp)\n\n    ax = plt.gca()\n    ax.set_xticks(np.arange(-0.5, matrix.shape[1], 1), minor=True)\n    ax.set_yticks(np.arange(-0.5, matrix.shape[0], 1), minor=True)\n\n    # 显示color bar\n    plt.colorbar()\n\n    # 在图中标注数量\n    thresh = 5\n    for x in range(matrix.shape[1]):\n        for y in range(matrix.shape[0]):\n            # 注意这里的matrix[y, x]不是matrix[x, y]\n            info = int(matrix[y, x])\n            ax.text(x, y, info,\n                    verticalalignment='center',\n                    horizontalalignment='center',\n                    color=\"white\" if info > thresh else \"black\")\n    ax.grid(which='minor', color='black', linestyle='-', linewidth=1.5)\n    plt.show()\n    plt.close()\n\n\ndef main():\n    # bottom to top\n    dilated_rates = [1, 2, 3]\n    # init feature map\n    size = 31\n    m = np.zeros(shape=(size, size), dtype=np.int32)\n    center = size // 2\n    m[center][center] = 1\n    # print(m)\n    # plot_map(m)\n\n    for index, dilated_r in enumerate(dilated_rates[::-1]):\n        new_map = dilated_conv_all_map(m, r=dilated_r)\n        m = new_map\n    print(m)\n    plot_map(m)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "others_project/kmeans_anchors/main.py",
    "content": "import random\nimport numpy as np\nfrom tqdm import tqdm\nfrom scipy.cluster.vq import kmeans\n\nfrom read_voc import VOCDataSet\nfrom yolo_kmeans import k_means, wh_iou\n\n\ndef anchor_fitness(k: np.ndarray, wh: np.ndarray, thr: float):  # mutation fitness\n    r = wh[:, None] / k[None]\n    x = np.minimum(r, 1. / r).min(2)  # ratio metric\n    # x = wh_iou(wh, k)  # iou metric\n    best = x.max(1)\n    f = (best * (best > thr).astype(np.float32)).mean()  # fitness\n    bpr = (best > thr).astype(np.float32).mean()  # best possible recall\n    return f, bpr\n\n\ndef main(img_size=512, n=9, thr=0.25, gen=1000):\n    # 从数据集中读取所有图片的wh以及对应bboxes的wh\n    dataset = VOCDataSet(voc_root=\"/data\", year=\"2012\", txt_name=\"train.txt\")\n    im_wh, boxes_wh = dataset.get_info()\n\n    # 最大边缩放到img_size\n    im_wh = np.array(im_wh, dtype=np.float32)\n    shapes = img_size * im_wh / im_wh.max(1, keepdims=True)\n    wh0 = np.concatenate([l * s for s, l in zip(shapes, boxes_wh)])  # wh\n\n    # Filter 过滤掉小目标\n    i = (wh0 < 3.0).any(1).sum()\n    if i:\n        print(f'WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.')\n    wh = wh0[(wh0 >= 2.0).any(1)]  # 只保留wh都大于等于2个像素的box\n\n    # Kmeans calculation\n    # print(f'Running kmeans for {n} anchors on {len(wh)} points...')\n    # s = wh.std(0)  # sigmas for whitening\n    # k, dist = kmeans(wh / s, n, iter=30)  # points, mean distance\n    # assert len(k) == n, print(f'ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}')\n    # k *= s\n    k = k_means(wh, n)\n\n    # 按面积排序\n    k = k[np.argsort(k.prod(1))]  # sort small to large\n    f, bpr = anchor_fitness(k, wh, thr)\n    print(\"kmeans: \" + \" \".join([f\"[{int(i[0])}, {int(i[1])}]\" for i in k]))\n    print(f\"fitness: {f:.5f}, best possible recall: {bpr:.5f}\")\n\n    # Evolve\n    # 遗传算法(在kmeans的结果基础上变异mutation)\n    npr = np.random\n    f, sh, mp, s = anchor_fitness(k, wh, thr)[0], k.shape, 0.9, 0.1  # fitness, generations, mutation prob, sigma\n    pbar = tqdm(range(gen), desc=f'Evolving anchors with Genetic Algorithm:')  # progress bar\n    for _ in pbar:\n        v = np.ones(sh)\n        while (v == 1).all():  # mutate until a change occurs (prevent duplicates)\n            v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)\n        kg = (k.copy() * v).clip(min=2.0)\n        fg, bpr = anchor_fitness(kg, wh, thr)\n        if fg > f:\n            f, k = fg, kg.copy()\n            pbar.desc = f'Evolving anchors with Genetic Algorithm: fitness = {f:.4f}'\n\n    # 按面积排序\n    k = k[np.argsort(k.prod(1))]  # sort small to large\n    print(\"genetic: \" + \" \".join([f\"[{int(i[0])}, {int(i[1])}]\" for i in k]))\n    print(f\"fitness: {f:.5f}, best possible recall: {bpr:.5f}\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "others_project/kmeans_anchors/plot_kmeans.py",
    "content": "import numpy as np\nfrom matplotlib import pyplot as plt\nnp.random.seed(0)\n\ncolors = np.array(['blue', 'black'])\n\n\ndef plot_clusters(data, cls, clusters, title=\"\"):\n    if cls is None:\n        c = [colors[0]] * data.shape[0]\n    else:\n        c = colors[cls].tolist()\n\n    plt.scatter(data[:, 0], data[:, 1], c=c)\n    for i, clus in enumerate(clusters):\n        plt.scatter(clus[0], clus[1], c='gold', marker='*', s=150)\n    plt.title(title)\n    plt.show()\n    plt.close()\n\n\ndef distances(data, clusters):\n    xy1 = data[:, None]  # [N,1,2]\n    xy2 = clusters[None]  # [1,M,2]\n    d = np.sum(np.power(xy2 - xy1, 2), axis=-1)\n    return d\n\n\ndef k_means(data, k, dist=np.mean):\n    \"\"\"\n    k-means methods\n    Args:\n        data: 需要聚类的data\n        k: 簇数(聚成几类)\n        dist: 更新簇坐标的方法\n    \"\"\"\n    data_number = data.shape[0]\n    last_nearest = np.zeros((data_number,))\n\n    # init k clusters\n    clusters = data[np.random.choice(data_number, k, replace=False)]\n    print(f\"random cluster: \\n {clusters}\")\n    # plot\n    plot_clusters(data, None, clusters, \"random clusters\")\n\n    step = 0\n    while True:\n        d = distances(data, clusters)\n        current_nearest = np.argmin(d, axis=1)\n\n        # plot\n        plot_clusters(data, current_nearest, clusters, f\"step {step}\")\n        \n        if (last_nearest == current_nearest).all():\n            break  # clusters won't change\n        for cluster in range(k):\n            # update clusters\n            clusters[cluster] = dist(data[current_nearest == cluster], axis=0)\n        last_nearest = current_nearest\n        step += 1\n\n    return clusters\n\n\ndef main():\n    x1, y1 = [np.random.normal(loc=1., size=150) for _ in range(2)]\n    x2, y2 = [np.random.normal(loc=5., size=150) for _ in range(2)]\n\n    x = np.concatenate([x1, x2])\n    y = np.concatenate([y1, y2])\n\n    plt.scatter(x, y, c='blue')\n    plt.title(\"initial data\")\n    plt.show()\n    plt.close()\n\n    clusters = k_means(np.concatenate([x[:, None], y[:, None]], axis=-1), k=2)\n    print(f\"k-means fluster: \\n {clusters}\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "others_project/kmeans_anchors/read_voc.py",
    "content": "import os\nfrom tqdm import tqdm\nfrom lxml import etree\n\n\nclass VOCDataSet(object):\n    def __init__(self, voc_root, year=\"2012\", txt_name: str = \"train.txt\"):\n        assert year in [\"2007\", \"2012\"], \"year must be in ['2007', '2012']\"\n        self.root = os.path.join(voc_root, \"VOCdevkit\", f\"VOC{year}\")\n        self.annotations_root = os.path.join(self.root, \"Annotations\")\n\n        # read train.txt or val.txt file\n        txt_path = os.path.join(self.root, \"ImageSets\", \"Main\", txt_name)\n        assert os.path.exists(txt_path), \"not found {} file.\".format(txt_name)\n\n        with open(txt_path) as read:\n            self.xml_list = [os.path.join(self.annotations_root, line.strip() + \".xml\")\n                             for line in read.readlines() if len(line.strip()) > 0]\n\n        # check file\n        assert len(self.xml_list) > 0, \"in '{}' file does not find any information.\".format(txt_path)\n        for xml_path in self.xml_list:\n            assert os.path.exists(xml_path), \"not found '{}' file.\".format(xml_path)\n\n    def __len__(self):\n        return len(self.xml_list)\n\n    def parse_xml_to_dict(self, xml):\n        \"\"\"\n        将xml文件解析成字典形式，参考tensorflow的recursive_parse_xml_to_dict\n        Args:\n            xml: xml tree obtained by parsing XML file contents using lxml.etree\n\n        Returns:\n            Python dictionary holding XML contents.\n        \"\"\"\n\n        if len(xml) == 0:  # 遍历到底层，直接返回tag对应的信息\n            return {xml.tag: xml.text}\n\n        result = {}\n        for child in xml:\n            child_result = self.parse_xml_to_dict(child)  # 递归遍历标签信息\n            if child.tag != 'object':\n                result[child.tag] = child_result[child.tag]\n            else:\n                if child.tag not in result:  # 因为object可能有多个，所以需要放入列表里\n                    result[child.tag] = []\n                result[child.tag].append(child_result[child.tag])\n        return {xml.tag: result}\n\n    def get_info(self):\n        im_wh_list = []\n        boxes_wh_list = []\n        for xml_path in tqdm(self.xml_list, desc=\"read data info.\"):\n            # read xml\n            with open(xml_path) as fid:\n                xml_str = fid.read()\n            xml = etree.fromstring(xml_str)\n            data = self.parse_xml_to_dict(xml)[\"annotation\"]\n            im_height = int(data[\"size\"][\"height\"])\n            im_width = int(data[\"size\"][\"width\"])\n\n            wh = []\n            for obj in data[\"object\"]:\n                xmin = float(obj[\"bndbox\"][\"xmin\"])\n                xmax = float(obj[\"bndbox\"][\"xmax\"])\n                ymin = float(obj[\"bndbox\"][\"ymin\"])\n                ymax = float(obj[\"bndbox\"][\"ymax\"])\n                wh.append([(xmax - xmin) / im_width, (ymax - ymin) / im_height])\n\n            if len(wh) == 0:\n                continue\n\n            im_wh_list.append([im_width, im_height])\n            boxes_wh_list.append(wh)\n\n        return im_wh_list, boxes_wh_list\n"
  },
  {
    "path": "others_project/kmeans_anchors/yolo_kmeans.py",
    "content": "import numpy as np\n\n\ndef wh_iou(wh1, wh2):\n    # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2\n    wh1 = wh1[:, None]  # [N,1,2]\n    wh2 = wh2[None]  # [1,M,2]\n    inter = np.minimum(wh1, wh2).prod(2)  # [N,M]\n    return inter / (wh1.prod(2) + wh2.prod(2) - inter)  # iou = inter / (area1 + area2 - inter)\n\n\ndef k_means(boxes, k, dist=np.median):\n    \"\"\"\n    yolo k-means methods\n    refer: https://github.com/qqwweee/keras-yolo3/blob/master/kmeans.py\n    Args:\n        boxes: 需要聚类的bboxes\n        k: 簇数(聚成几类)\n        dist: 更新簇坐标的方法(默认使用中位数，比均值效果略好)\n    \"\"\"\n    box_number = boxes.shape[0]\n    last_nearest = np.zeros((box_number,))\n    # np.random.seed(0)  # 固定随机数种子\n\n    # init k clusters\n    clusters = boxes[np.random.choice(box_number, k, replace=False)]\n\n    while True:\n        distances = 1 - wh_iou(boxes, clusters)\n        current_nearest = np.argmin(distances, axis=1)\n        if (last_nearest == current_nearest).all():\n            break  # clusters won't change\n        for cluster in range(k):\n            # update clusters\n            clusters[cluster] = dist(boxes[current_nearest == cluster], axis=0)\n\n        last_nearest = current_nearest\n\n    return clusters\n"
  },
  {
    "path": "others_project/openvinotest/openvino_cls_test/class_indices.json",
    "content": "{\n    \"0\": \"daisy\",\n    \"1\": \"dandelion\",\n    \"2\": \"roses\",\n    \"3\": \"sunflowers\",\n    \"4\": \"tulips\"\n}"
  },
  {
    "path": "others_project/openvinotest/openvino_cls_test/create_imagenet_annotation.py",
    "content": "import os\nimport glob\n\nimage_dir = \"/home/w180662/my_project/my_github/data_set/flower_data/train\"\nassert os.path.exists(image_dir), \"image dir does not exist...\"\n\nimg_list = glob.glob(os.path.join(image_dir, \"*\", \"*.jpg\"))\nassert len(img_list) > 0, \"No images(.jpg) were found in image dir...\"\n\nclasses_info = os.listdir(image_dir)\nclasses_info.sort()\nclasses_dict = {}\n\n# create label file\nwith open(\"my_labels.txt\", \"w\") as lw:\n    # 注意，没有背景时，index要从0开始\n    for index, c in enumerate(classes_info, start=0):\n        txt = \"{}:{}\".format(index, c)\n        if index != len(classes_info):\n            txt += \"\\n\"\n        lw.write(txt)\n        classes_dict.update({c: str(index)})\nprint(\"create my_labels.txt successful...\")\n\n# create annotation file\nwith open(\"my_annotation.txt\", \"w\") as aw:\n    for img in img_list:\n        img_classes = classes_dict[img.split(\"/\")[-2]]\n        txt = \"{} {}\".format(img, img_classes)\n        if index != len(img_list):\n            txt += \"\\n\"\n        aw.write(txt)\nprint(\"create my_annotation.txt successful...\")\n"
  },
  {
    "path": "others_project/openvinotest/openvino_cls_test/float32vsint8.py",
    "content": "import os\nimport time\nimport torch\nfrom torchvision import transforms, datasets\nfrom tqdm import tqdm\nimport numpy as np\nfrom openvino.inference_engine import IECore\n\ndevice = torch.device(\"cpu\")\n\n\ndef check_path_exist(path):\n    assert os.path.exists(path), \"{} does not exist...\".format(path)\n\n\ndef to_numpy(tensor):\n    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()\n\n\ndef openvino_model_speed(data_loader, val_num, xml_path, bin_path):\n    device = \"CPU\"\n    model_xml_path = xml_path\n    model_bin_path = bin_path\n    check_path_exist(model_xml_path)\n    check_path_exist(model_bin_path)\n\n    # inference engine\n    ie = IECore()\n\n    # read IR\n    net = ie.read_network(model=model_xml_path, weights=model_bin_path)\n    # load model\n    exec_net = ie.load_network(network=net, device_name=device)\n\n    # check supported layers for device\n    if device == \"CPU\":\n        supported_layers = ie.query_network(net, \"CPU\")\n        not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers]\n        if len(not_supported_layers) > 0:\n            print(\"Please try to specify cpu extensions library path in sample's command line parameters using -l \"\n                  \"or --cpu_extension command line argument\")\n            raise ValueError(\"device {} not support layers:\\n {}\".format(device,\n                                                                         \",\".join(not_supported_layers)))\n\n    # get input and output name\n    input_blob = next(iter(net.input_info))\n    output_blob = next(iter(net.outputs))\n\n    # set batch size\n    batch_size = 1\n    net.batch_size = batch_size\n\n    # read and pre-process input images\n    # n, c, h, w = net.input_info[input_blob].input_data.shape\n    forward_time = 0\n    acc = 0.0  # accumulate accurate number / epoch\n    for val_data in tqdm(data_loader, desc=\"Running onnx model...\"):\n        val_images, val_labels = val_data\n        input_dict = {input_blob: to_numpy(val_images)}\n        # start sync inference\n        t1 = time.time()\n        res = exec_net.infer(inputs=input_dict)\n        t2 = time.time()\n        forward_time += (t2 - t1)\n        outputs = res[output_blob]\n        predict_y = np.argmax(outputs, axis=1)\n        acc += (predict_y == to_numpy(val_labels)).sum()\n    val_accurate = acc / val_num\n    fps = round(val_num / forward_time, 1)\n    print(\"openvino info:\\nfps: {}/s  accuracy: {}\\n\".format(fps,\n                                                             val_accurate))\n\n\ndef main():\n    data_transform = transforms.Compose([transforms.Resize([224, 224]),\n                                         transforms.ToTensor(),\n                                         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])\n\n    data_root = \"/home/w180662/my_project/my_github\"  # get data root path\n    image_path = os.path.join(data_root, \"data_set/flower_data/\")  # flower data set path\n    check_path_exist(image_path)\n\n    batch_size = 1\n\n    validate_dataset = datasets.ImageFolder(root=image_path + \"val\",\n                                            transform=data_transform)\n    val_num = len(validate_dataset)\n    validate_loader = torch.utils.data.DataLoader(validate_dataset,\n                                                  batch_size=batch_size,\n                                                  shuffle=False,\n                                                  num_workers=4)\n\n    openvino_model_speed(validate_loader, val_num, \"./resnet34.xml\", \"./resnet34.bin\")\n    openvino_model_speed(validate_loader, val_num, \"./resnet34a.xml\", \"./resnet34a.bin\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "others_project/openvinotest/openvino_cls_test/main.py",
    "content": "import sys\nimport cv2\nimport os\nimport glob\nimport json\nimport numpy as np\nimport logging as log\nfrom openvino.inference_engine import IECore\n\n\ndef main():\n    device = \"CPU\"\n    model_xml_path = \"./resnet34.xml\"\n    model_bin_path = \"./resnet34.bin\"\n    image_path = \"./\"\n    class_json_path = './class_indices.json'\n\n    # set log format\n    log.basicConfig(format=\"[ %(levelname)s ] %(message)s\", level=log.INFO, stream=sys.stdout)\n\n    assert os.path.exists(model_xml_path), \".xml file does not exist...\"\n    assert os.path.exists(model_bin_path), \".bin file does not exist...\"\n\n    # search *.jpg files\n    image_list = glob.glob(os.path.join(image_path, \"*.jpg\"))\n    assert len(image_list) > 0, \"no image(.jpg) be found...\"\n\n    # load class label\n    assert os.path.exists(class_json_path), \"class_json_path does not exist...\"\n    json_file = open(class_json_path, 'r')\n    class_indict = json.load(json_file)\n\n    # inference engine\n    ie = IECore()\n\n    # read IR\n    net = ie.read_network(model=model_xml_path, weights=model_bin_path)\n    # load model\n    exec_net = ie.load_network(network=net, device_name=device)\n\n    # check supported layers for device\n    if device == \"CPU\":\n        supported_layers = ie.query_network(net, \"CPU\")\n        not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers]\n        if len(not_supported_layers) > 0:\n            log.error(\"device {} not support layers:\\n {}\".format(device,\n                                                                  \",\".join(not_supported_layers)))\n            log.error(\"Please try to specify cpu extensions library path in sample's command line parameters using -l \"\n                      \"or --cpu_extension command line argument\")\n            sys.exit(1)\n\n    # get input and output name\n    input_blob = next(iter(net.input_info))\n    output_blob = next(iter(net.outputs))\n\n    # set batch size\n    batch_size = 1\n    net.batch_size = batch_size\n\n    # read and pre-process input images\n    n, c, h, w = net.input_info[input_blob].input_data.shape\n    # images = np.ndarray(shape=(n, c, h, w))\n    # inference every image\n    for i in range(len(image_list)):\n        image = cv2.imread(image_list[i])\n        if image.shape[:-1] != (h, w):\n            image = cv2.resize(image, (w, h))\n        # bgr(opencv default format) -> rgb\n        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n        # pre-process\n        image = (image / 255.).astype(np.float32)\n        image = (image - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]\n        # change data from HWC to CHW\n        image = image.transpose((2, 0, 1))\n        # add batch dimension\n        image = np.expand_dims(image, axis=0)\n\n        # start sync inference\n        res = exec_net.infer(inputs={input_blob: image})\n        prediction = np.squeeze(res[output_blob])\n        # print(prediction)\n\n        # np softmax process\n        prediction -= np.max(prediction, keepdims=True)  # 为了稳定地计算softmax概率， 一般会减掉最大元素\n        prediction = np.exp(prediction) / np.sum(np.exp(prediction), keepdims=True)\n        class_index = np.argmax(prediction, axis=0)\n        print(\"prediction: '{}'\\nclass:{}  probability:{}\\n\".format(image_list[i],\n                                                                    class_indict[str(class_index)],\n                                                                    np.around(prediction[class_index]), 2))\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "others_project/openvinotest/openvino_cls_test/model.py",
    "content": "import torch.nn as nn\nimport torch\n\n\nclass BasicBlock(nn.Module):\n    expansion = 1\n\n    def __init__(self, in_channel, out_channel, stride=1, downsample=None):\n        super(BasicBlock, self).__init__()\n        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,\n                               kernel_size=3, stride=stride, padding=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(out_channel)\n        self.relu = nn.ReLU()\n        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,\n                               kernel_size=3, stride=1, padding=1, bias=False)\n        self.bn2 = nn.BatchNorm2d(out_channel)\n        self.downsample = downsample\n\n    def forward(self, x):\n        identity = x\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass Bottleneck(nn.Module):\n    expansion = 4\n\n    def __init__(self, in_channel, out_channel, stride=1, downsample=None):\n        super(Bottleneck, self).__init__()\n        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,\n                               kernel_size=1, stride=1, bias=False)  # squeeze channels\n        self.bn1 = nn.BatchNorm2d(out_channel)\n        # -----------------------------------------\n        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,\n                               kernel_size=3, stride=stride, bias=False, padding=1)\n        self.bn2 = nn.BatchNorm2d(out_channel)\n        # -----------------------------------------\n        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion,\n                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels\n        self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n\n    def forward(self, x):\n        identity = x\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass ResNet(nn.Module):\n\n    def __init__(self, block, blocks_num, num_classes=1000, include_top=True):\n        super(ResNet, self).__init__()\n        self.include_top = include_top\n        self.in_channel = 64\n\n        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,\n                               padding=3, bias=False)\n        self.bn1 = nn.BatchNorm2d(self.in_channel)\n        self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.layer1 = self._make_layer(block, 64, blocks_num[0])\n        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)\n        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)\n        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)\n        if self.include_top:\n            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)\n            self.fc = nn.Linear(512 * block.expansion, num_classes)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')\n\n    def _make_layer(self, block, channel, block_num, stride=1):\n        downsample = None\n        if stride != 1 or self.in_channel != channel * block.expansion:\n            downsample = nn.Sequential(\n                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),\n                nn.BatchNorm2d(channel * block.expansion))\n\n        layers = []\n        layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride))\n        self.in_channel = channel * block.expansion\n\n        for _ in range(1, block_num):\n            layers.append(block(self.in_channel, channel))\n\n        return nn.Sequential(*layers)\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n\n        if self.include_top:\n            x = self.avgpool(x)\n            x = torch.flatten(x, 1)\n            x = self.fc(x)\n\n        return x\n\n\ndef resnet34(num_classes=1000, include_top=True):\n    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)\n\n\ndef resnet101(num_classes=1000, include_top=True):\n    return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)\n"
  },
  {
    "path": "others_project/openvinotest/openvino_cls_test/speed_test.py",
    "content": "import os\nimport time\nimport torch\nfrom torchvision import transforms, datasets\nfrom tqdm import tqdm\nimport onnx\nimport onnxruntime\nimport numpy as np\nfrom openvino.inference_engine import IECore\nfrom model import resnet34\n\ndevice = torch.device(\"cpu\")\n\n\ndef check_path_exist(path):\n    assert os.path.exists(path), \"{} does not exist...\".format(path)\n\n\ndef to_numpy(tensor):\n    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()\n\n\ndef pytorch_model_speed(data_loader, val_num):\n    net = resnet34(num_classes=5)\n    # load weights\n    model_weight_path = \"./resNet34.pth\"\n    check_path_exist(model_weight_path)\n    net.load_state_dict(torch.load(model_weight_path, map_location=device), strict=False)\n    net.eval()\n    test_data = torch.rand((1, 3, 224, 224))\n    net(test_data.to(device))\n\n    forward_time = 0\n    acc = 0.0  # accumulate accurate number / epoch\n    with torch.no_grad():\n        for val_data in tqdm(data_loader, desc=\"Running pytorch model...\"):\n            val_images, val_labels = val_data\n            t1 = time.time()\n            outputs = net(val_images.to(device))  # eval model only have last output layer\n            t2 = time.time()\n            forward_time += (t2 - t1)\n            predict_y = torch.max(outputs, dim=1)[1]\n            acc += (predict_y == val_labels.to(device)).sum().item()\n        val_accurate = acc / val_num\n    fps = round(val_num / forward_time, 1)\n    print(\"pytorch info:\\nfps: {}/s  accuracy: {}\\n\".format(fps,\n                                                            val_accurate))\n    return fps, val_accurate, \"Pytorch(not opt)\"\n\n\ndef onnx_model_speed(data_loader, val_num):\n    # check onnx model\n    onnx_path = \"./resnet34.onnx\"\n    check_path_exist(onnx_path)\n    onnx_model = onnx.load(onnx_path)\n    onnx.checker.check_model(onnx_model)\n\n    ort_session = onnxruntime.InferenceSession(onnx_path)\n    input_name = ort_session.get_inputs()[0].name\n\n    forward_time = 0\n    acc = 0.0  # accumulate accurate number / epoch\n    for val_data in tqdm(data_loader, desc=\"Running onnx model...\"):\n        val_images, val_labels = val_data\n        input_dict = {input_name: to_numpy(val_images)}\n        t1 = time.time()\n        outputs = ort_session.run(None, input_dict)\n        t2 = time.time()\n        forward_time += (t2 - t1)\n        outputs = outputs[0]\n        predict_y = np.argmax(outputs, axis=1)\n        acc += (predict_y == to_numpy(val_labels)).sum()\n    val_accurate = acc / val_num\n    fps = round(val_num / forward_time, 1)\n    print(\"onnx info:\\nfps: {}/s  accuracy: {}\\n\".format(fps,\n                                                         val_accurate))\n    return fps, val_accurate, \"ONNX\"\n\n\ndef openvino_model_speed(data_loader, val_num):\n    device = \"CPU\"\n    model_xml_path = \"./resnet34r.xml\"\n    model_bin_path = \"./resnet34r.bin\"\n    check_path_exist(model_xml_path)\n    check_path_exist(model_bin_path)\n\n    # inference engine\n    ie = IECore()\n\n    # read IR\n    net = ie.read_network(model=model_xml_path, weights=model_bin_path)\n    # load model\n    exec_net = ie.load_network(network=net, device_name=device)\n\n    # check supported layers for device\n    if device == \"CPU\":\n        supported_layers = ie.query_network(net, \"CPU\")\n        not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers]\n        if len(not_supported_layers) > 0:\n            print(\"Please try to specify cpu extensions library path in sample's command line parameters using -l \"\n                  \"or --cpu_extension command line argument\")\n            raise ValueError(\"device {} not support layers:\\n {}\".format(device,\n                                                                         \",\".join(not_supported_layers)))\n\n    # get input and output name\n    input_blob = next(iter(net.input_info))\n    output_blob = next(iter(net.outputs))\n\n    # set batch size\n    batch_size = 1\n    net.batch_size = batch_size\n\n    # read and pre-process input images\n    # n, c, h, w = net.input_info[input_blob].input_data.shape\n    forward_time = 0\n    acc = 0.0  # accumulate accurate number / epoch\n    for val_data in tqdm(data_loader, desc=\"Running onnx model...\"):\n        val_images, val_labels = val_data\n        input_dict = {input_blob: to_numpy(val_images)}\n        # start sync inference\n        t1 = time.time()\n        res = exec_net.infer(inputs=input_dict)\n        t2 = time.time()\n        forward_time += (t2 - t1)\n        outputs = res[output_blob]\n        predict_y = np.argmax(outputs, axis=1)\n        acc += (predict_y == to_numpy(val_labels)).sum()\n    val_accurate = acc / val_num\n    fps = round(val_num / forward_time, 1)\n    print(\"openvino info:\\nfps: {}/s  accuracy: {}\\n\".format(fps,\n                                                             val_accurate))\n\n\ndef main():\n    data_transform = transforms.Compose([transforms.Resize([224, 224]),\n                                         transforms.ToTensor(),\n                                         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])\n\n    data_root = \"/home/w180662/my_project/my_github\"  # get data root path\n    image_path = os.path.join(data_root, \"data_set/flower_data/\")  # flower data set path\n    check_path_exist(image_path)\n\n    batch_size = 1\n\n    validate_dataset = datasets.ImageFolder(root=image_path + \"val\",\n                                            transform=data_transform)\n    val_num = len(validate_dataset)\n    validate_loader = torch.utils.data.DataLoader(validate_dataset,\n                                                  batch_size=batch_size,\n                                                  shuffle=False,\n                                                  num_workers=4)\n\n    pytorch_model_speed(validate_loader, val_num)\n    onnx_model_speed(validate_loader, val_num)\n    openvino_model_speed(validate_loader, val_num)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "others_project/readPbFile/README.md",
    "content": "该项目用于读取冻结后的pb文件并进行预测  \n使用步骤：   \n（1）准备好需要使用的pb冻结文件，pbtxt标签文件，测试用的图片  \n（2）修改info.config文件中的相关信息  \n\n![Example image](https://tensorflowob/raw/master/object_detection/readPbFile/example1.jpg)     \n![Example image](https://tensorflowob/raw/master/object_detection/readPbFile/example2.jpg)"
  },
  {
    "path": "others_project/readPbFile/pascal_label_map.pbtxt",
    "content": "item {\n  id: 1\n  name: 'aeroplane'\n}\n\nitem {\n  id: 2\n  name: 'bicycle'\n}\n\nitem {\n  id: 3\n  name: 'bird'\n}\n\nitem {\n  id: 4\n  name: 'boat'\n}\n\nitem {\n  id: 5\n  name: 'bottle'\n}\n\nitem {\n  id: 6\n  name: 'bus'\n}\n\nitem {\n  id: 7\n  name: 'car'\n}\n\nitem {\n  id: 8\n  name: 'cat'\n}\n\nitem {\n  id: 9\n  name: 'chair'\n}\n\nitem {\n  id: 10\n  name: 'cow'\n}\n\nitem {\n  id: 11\n  name: 'diningtable'\n}\n\nitem {\n  id: 12\n  name: 'dog'\n}\n\nitem {\n  id: 13\n  name: 'horse'\n}\n\nitem {\n  id: 14\n  name: 'motorbike'\n}\n\nitem {\n  id: 15\n  name: 'person'\n}\n\nitem {\n  id: 16\n  name: 'pottedplant'\n}\n\nitem {\n  id: 17\n  name: 'sheep'\n}\n\nitem {\n  id: 18\n  name: 'sofa'\n}\n\nitem {\n  id: 19\n  name: 'train'\n}\n\nitem {\n  id: 20\n  name: 'tvmonitor'\n}\n"
  },
  {
    "path": "others_project/readPbFile/readPb.py",
    "content": "import tensorflow as tf\nimport configparser\nfrom distutils.version import StrictVersion\nimport cv2\nimport glob\nfrom using_function import draw_box, read_pbtxt, get_inAndout_tensor, convert_type, read_image\n\nif StrictVersion(tf.__version__) < StrictVersion('1.12.0'):\n    raise ImportError('Please upgrade your TensorFlow installation to v1.12.*.')\n\n# 读取参数配置文件\nconf = configparser.ConfigParser()\nconf.read('info.config')\npath_to_frozen_graph = conf.get('tensorflow', 'path_to_frozen_graph')\npath_to_labels = conf.get('tensorflow', 'path_to_labels')\npath_to_images = conf.get('tensorflow', 'path_to_images')\nprobability_thresh = float(conf.get('tensorflow', 'probability_thresh'))\n\n# 读取pbtxt标签信息\ncategory_index = read_pbtxt(path_to_labels)\n\ndetection_graph = tf.Graph()\nwith detection_graph.as_default():\n    od_graph_def = tf.GraphDef()\n    with tf.gfile.GFile(path_to_frozen_graph, 'rb') as fid:\n        serialized_graph = fid.read()\n        od_graph_def.ParseFromString(serialized_graph)\n        tf.import_graph_def(od_graph_def, name='')\n\nwith detection_graph.as_default():\n    with tf.Session() as sess:\n        # Get handles to input and output tensors\n        tensor_dict, image_tensor = get_inAndout_tensor()\n        test_image_paths = glob.glob(path_to_images)\n        for image_path in test_image_paths:\n            image_BGR, image_np_expanded = read_image(image_path)\n\n            # Run inference\n            output_dict = sess.run(tensor_dict,\n                                   feed_dict={image_tensor: image_np_expanded})\n            # all outputs are float32 numpy arrays, so convert types as appropriate\n            convert_type(output_dict)\n\n            draw_box(image_BGR,\n                     output_dict['detection_boxes'],\n                     output_dict['detection_classes'],\n                     output_dict['detection_scores'],\n                     category_index,\n                     thresh=probability_thresh,\n                     line_thickness=5)\n            cv2.namedWindow(\"prediction\", cv2.WINDOW_AUTOSIZE)\n            cv2.imshow(\"prediction\", image_BGR)\n            cv2.waitKey(0)\n"
  },
  {
    "path": "others_project/readPbFile/test_images/image_info.txt",
    "content": "\nImage provenance:\nimage1.jpg: https://commons.wikimedia.org/wiki/File:Baegle_dwa.jpg\nimage2.jpg: Michael Miley,\n  https://www.flickr.com/photos/mike_miley/4678754542/in/photolist-88rQHL-88oBVp-88oC2B-88rS6J-88rSqm-88oBLv-88oBC4\n\n"
  },
  {
    "path": "others_project/readPbFile/using_function.py",
    "content": "import collections\nimport six\nimport PIL.Image as Image\nimport PIL.ImageDraw as ImageDraw\nimport PIL.ImageFont as ImageFont\nimport numpy as np\nimport tensorflow as tf\nimport cv2\n\nSTANDARD_COLORS = [\n    'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',\n    'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',\n    'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',\n    'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',\n    'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',\n    'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',\n    'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',\n    'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',\n    'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',\n    'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',\n    'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',\n    'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',\n    'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',\n    'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',\n    'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',\n    'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',\n    'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',\n    'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',\n    'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',\n    'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',\n    'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',\n    'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',\n    'WhiteSmoke', 'Yellow', 'YellowGreen'\n]\n\n\ndef filter_low_thresh(boxes, scores, classes, category_index, thresh, box_to_display_str_map, box_to_color_map):\n    for i in range(boxes.shape[0]):\n        if scores[i] > thresh:\n            box = tuple(boxes[i].tolist())\n            if classes[i] in six.viewkeys(category_index):\n                class_name = category_index[classes[i]]['name']\n            else:\n                class_name = 'N/A'\n            display_str = str(class_name)\n            display_str = '{}: {}%'.format(display_str, int(100 * scores[i]))\n            box_to_display_str_map[box].append(display_str)\n            box_to_color_map[box] = STANDARD_COLORS[\n                classes[i] % len(STANDARD_COLORS)]\n        else:\n            break  # 网络输出概率已经排序过，当遇到一个不满足后面的肯定不满足\n\n\ndef draw_text(draw, box_to_display_str_map, box, left, right, top, bottom, color):\n    try:\n        font = ImageFont.truetype('arial.ttf', 24)\n    except IOError:\n        font = ImageFont.load_default()\n\n    # If the total height of the display strings added to the top of the bounding\n    # box exceeds the top of the image, stack the strings below the bounding box\n    # instead of above.\n    display_str_heights = [font.getsize(ds)[1] for ds in box_to_display_str_map[box]]\n    # Each display_str has a top and bottom margin of 0.05x.\n    total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)\n\n    if top > total_display_str_height:\n        text_bottom = top\n    else:\n        text_bottom = bottom + total_display_str_height\n    # Reverse list and print from bottom to top.\n    for display_str in box_to_display_str_map[box][::-1]:\n        text_width, text_height = font.getsize(display_str)\n        margin = np.ceil(0.05 * text_height)\n        draw.rectangle([(left, text_bottom - text_height - 2 * margin),\n                        (left + text_width, text_bottom)], fill=color)\n        draw.text((left + margin, text_bottom - text_height - margin),\n                  display_str,\n                  fill='black',\n                  font=font)\n        text_bottom -= text_height - 2 * margin\n\n\ndef draw_box(image, boxes, classes, scores, category_index, thresh=0.5, line_thickness=8):\n    box_to_display_str_map = collections.defaultdict(list)\n    box_to_color_map = collections.defaultdict(str)\n\n    filter_low_thresh(boxes, scores, classes, category_index, thresh, box_to_display_str_map, box_to_color_map)\n\n    # Draw all boxes onto image.\n    for box, color in box_to_color_map.items():\n        ymin, xmin, ymax, xmax = box\n        image_pil = Image.fromarray(np.uint8(image)).convert('RGB')\n        draw = ImageDraw.Draw(image_pil)\n        im_width, im_height = image_pil.size\n        (left, right, top, bottom) = (xmin * im_width, xmax * im_width,\n                                      ymin * im_height, ymax * im_height)\n        draw.line([(left, top), (left, bottom), (right, bottom),\n                   (right, top), (left, top)], width=line_thickness, fill=color)\n        draw_text(draw, box_to_display_str_map, box, left, right, top, bottom, color)\n        np.copyto(image, np.array(image_pil))\n    return image\n\n\ndef read_pbtxt(filename):\n    category_index = {}\n    with open(filename, 'r') as reader:\n        txt = str(reader.read())\n        txt = txt.replace(\" \", \"\").replace(\"{\", \"\").replace(\"}\", \"\")\n        txtList = txt.split(\"item\")[1:]\n        for index, line in enumerate(txtList):\n            line = line.strip(\"\\n\").split('\\n')\n            category_index[index + 1] = {'id': int(line[0][3:]), 'name': line[1][6: -1]}\n    return category_index\n\n\ndef get_inAndout_tensor():\n    ops = tf.get_default_graph().get_operations()\n    all_tensor_names = {output.name for op in ops for output in op.outputs}\n    tensor_dict = {}\n    outputKeys = ['num_detections', 'detection_boxes', 'detection_scores', 'detection_classes']\n    for key in outputKeys:\n        tensor_name = key + ':0'\n        if tensor_name in all_tensor_names:\n            tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(\n                tensor_name)\n    image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')\n    return tensor_dict, image_tensor\n\n\ndef convert_type(output_dict):\n    output_dict['num_detections'] = int(output_dict['num_detections'][0])\n    output_dict['detection_classes'] = output_dict[\n        'detection_classes'][0].astype(np.int64)\n    output_dict['detection_boxes'] = output_dict['detection_boxes'][0]\n    output_dict['detection_scores'] = output_dict['detection_scores'][0]\n\n\ndef read_image(image_path):\n    image_BGR = cv2.imread(image_path)\n    image_RGB = np.zeros_like(image_BGR)\n    cv2.cvtColor(image_BGR, cv2.COLOR_BGR2RGB, image_RGB)\n    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]\n    image_np_expanded = np.expand_dims(image_RGB, axis=0)\n    return image_BGR, image_np_expanded\n"
  },
  {
    "path": "others_project/textcnnKeras/dataGenerator.py",
    "content": "from tensorflow import keras\nfrom sklearn.preprocessing import LabelEncoder\nimport random\n\n\ndef content2idList(content, word2id_dict):\n    \"\"\"\n    该函数的目的是将文本转换为对应的汉字数字id\n    content：输入的文本\n    word2id_dict：用于查找转换的字典\n    \"\"\"\n    idList = []\n    for word in content:  # 遍历每一个汉字\n        if word in word2id_dict:  # 当刚文字在字典中时才进行转换，否则丢弃\n            idList.append(word2id_dict[word])\n    return idList\n\n\ndef generatorInfo(batch_size, seq_length, num_classes, file_name):\n    \"\"\"\n    batch_size：生成数据的batch size\n    seq_length：输入文字序列长度\n    num_classes：文本的类别数\n    file_name：读取文件的路径\n    \"\"\"\n    # 读取词库文件\n    with open('./cnews/cnews.vocab.txt', encoding='utf-8') as file:\n        vocabulary_list = [k.strip() for k in file.readlines()]\n    word2id_dict = dict([(b, a) for a, b in enumerate(vocabulary_list)])\n\n    # 读取文本文件\n    with open(file_name, encoding='utf-8') as file:\n        line_list = [k.strip() for k in file.readlines()]\n        data_label_list = []  # 创建数据标签文件\n        data_content_list = []  # 创建数据文本文件\n        for k in line_list:\n            t = k.split(maxsplit=1)\n            data_label_list.append(t[0])\n            data_content_list.append(t[1])\n\n    data_id_list = [content2idList(content, word2id_dict) for content in data_content_list]  # 将文本数据转换拿为数字序列\n    # 将list数据类型转换为ndarray数据类型，并按照seq_length长度去统一化文本序列长度，\n    # 若长度超过设定值将其截断保留后半部分，若长度不足前面补0\n    data_X = keras.preprocessing.sequence.pad_sequences(data_id_list, seq_length, truncating='pre')\n    labelEncoder = LabelEncoder()\n    data_y = labelEncoder.fit_transform(data_label_list)  # 将文字标签转为数字标签\n    data_Y = keras.utils.to_categorical(data_y, num_classes)  # 将数字标签转为one-hot标签\n\n    while True:\n        selected_index = random.sample(list(range(len(data_y))), k=batch_size)  # 按照数据集合的长度随机抽取batch_size个数据的index\n        batch_X = data_X[selected_index]  # 随机抽取的文本信息（数字化序列）\n        batch_Y = data_Y[selected_index]  # 随机抽取的标签信息（one-hot编码）\n        yield (batch_X, batch_Y)\n\n"
  },
  {
    "path": "others_project/textcnnKeras/data_link.txt",
    "content": "baidupan_url = \"https://pan.baidu.com/s/1w452Z5eXbQSDQfgEBNUdlg\"\nextract_code = \"8cwv\""
  },
  {
    "path": "others_project/textcnnKeras/main.py",
    "content": "from models import text_cnn, simpleNet, text_cnn_V2\nfrom dataGenerator import generatorInfo\nfrom tensorflow import keras\n\nvocab_size = 5000\nseq_length = 600\nembedding_dim = 64\nnum_classes = 10\ntrainBatchSize = 64\nevalBatchSize = 200\nsteps_per_epoch = 50000 // trainBatchSize\nepoch = 2\nworkers = 4\nlogdir = './log/'\ntrainFileName = './cnews/cnews.train.txt'\nevalFileName = './cnews/cnews.test.txt'\n\nmodel = text_cnn(seq_length=seq_length,\n                 vocab_size=vocab_size,\n                 embedding_dim=embedding_dim,\n                 num_cla=num_classes,\n                 kernelNum=64)\n\ntrainGenerator = generatorInfo(trainBatchSize, seq_length, num_classes, trainFileName)\nevalGenerator = generatorInfo(evalBatchSize, seq_length, num_classes, evalFileName)\n\n\ndef lrSchedule(epoch):\n    lr = keras.backend.get_value(model.optimizer.lr)\n    if epoch % 1 == 0 and epoch != 0:\n        lr = lr * 0.5\n    return lr\n\n\nlog = keras.callbacks.TensorBoard(log_dir=logdir, update_freq=500)\nreduceLr = keras.callbacks.LearningRateScheduler(lrSchedule, verbose=1)\n\nmodel.fit_generator(generator=trainGenerator,\n                    steps_per_epoch=steps_per_epoch,\n                    epochs=epoch,\n                    validation_data=evalGenerator,\n                    validation_steps=10,\n                    workers=1,\n                    callbacks=[log, reduceLr])\nmodel.save_weights(logdir + 'train_weight.h5')\n"
  },
  {
    "path": "others_project/textcnnKeras/models.py",
    "content": "from tensorflow import keras\n\n\ndef text_cnn(seq_length, vocab_size, embedding_dim, num_cla, kernelNum):\n    \"\"\"\n    :param seq_length:  输入的文字序列长度\n    :param vocab_size:  词汇库的大小\n    :param embedding_dim:  生成词向量的特征维度\n    :param num_cla: 分类类别\n    :return: keras model\n    \"\"\"\n    inputX = keras.layers.Input(shape=(seq_length,), dtype='int32')\n    embOut = keras.layers.Embedding(vocab_size, embedding_dim, input_length=seq_length)(inputX)\n    # 分别使用长度为3,4,5的词窗去执行卷积\n    conv1 = keras.layers.Conv1D(kernelNum, 3, padding='valid', strides=1, activation='relu')(embOut)\n    maxp1 = keras.layers.MaxPool1D(pool_size=int(conv1.shape[1]))(conv1)\n\n    conv2 = keras.layers.Conv1D(kernelNum, 4, padding='valid', strides=1, activation='relu')(embOut)\n    maxp2 = keras.layers.MaxPool1D(pool_size=int(conv2.shape[1]))(conv2)\n\n    conv3 = keras.layers.Conv1D(kernelNum, 5, padding='valid', strides=1, activation='relu')(embOut)\n    maxp3 = keras.layers.MaxPool1D(pool_size=int(conv3.shape[1]))(conv3)\n\n    # 合并三个模型的输出向量\n    cnn = keras.layers.Concatenate(axis=-1)([maxp1, maxp2, maxp3])\n    flat = keras.layers.Flatten()(cnn)\n    dense1 = keras.layers.Dense(128)(flat)\n    drop = keras.layers.Dropout(0.25)(dense1)\n    denseRelu = keras.layers.ReLU()(drop)\n    predictY = keras.layers.Dense(num_cla, activation='softmax')(denseRelu)\n    # 编译模型\n    model = keras.models.Model(inputs=inputX, outputs=predictY)\n    # 指定loss的计算方法，设置优化器，编译模型\n    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])\n    return model\n\n\ndef text_cnn_V2(seq_length, vocab_size, embedding_dim, num_cla, kernelNum=128):\n    \"\"\"\n    :param seq_length:  输入的文字序列长度\n    :param vocab_size:  词汇库的大小\n    :param embedding_dim:  生成词向量的特征维度\n    :param num_cla: 分类类别\n    :return: keras model\n    \"\"\"\n    inputX = keras.layers.Input(shape=(seq_length,), dtype='int32')\n    embOut = keras.layers.Embedding(vocab_size, embedding_dim, input_length=seq_length)(inputX)\n    # 分别使用长度为3,4,5的词窗去执行卷积\n    conv1 = keras.layers.Conv1D(kernelNum, 3, padding='valid', strides=1, activation='relu')(embOut)\n    maxp1 = keras.layers.SeparableConv1D(filters=int(conv1.shape[2]), kernel_size=int(conv1.shape[1]))(conv1)\n\n    conv2 = keras.layers.Conv1D(kernelNum, 4, padding='valid', strides=1, activation='relu')(embOut)\n    maxp2 = keras.layers.SeparableConv1D(filters=int(conv2.shape[2]), kernel_size=int(conv2.shape[1]))(conv2)\n\n    conv3 = keras.layers.Conv1D(kernelNum, 5, padding='valid', strides=1, activation='relu')(embOut)\n    maxp3 = keras.layers.SeparableConv1D(filters=int(conv3.shape[2]), kernel_size=int(conv3.shape[1]))(conv3)\n\n    # 合并三个模型的输出向量\n    cnn = keras.layers.Concatenate(axis=2)([maxp1, maxp2, maxp3])\n    bn = keras.layers.BatchNormalization()(cnn)\n    conv4 = keras.layers.Conv1D(num_cla, kernel_size=int(cnn.shape[1]), activation='softmax')(bn)\n    # predictY = keras.layers.Lambda(keras.backend.squeeze, arguments={'axis': 1})(conv4)\n    predictY = keras.layers.Flatten()(conv4)\n    # 编译模型\n    model = keras.models.Model(inputs=inputX, outputs=predictY)\n    # 指定loss的计算方法，设置优化器，编译模型\n    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])\n    return model\n\n\ndef simpleNet(seq_length, vocab_size, embedding_dim, num_cla, kernelNum=128):\n    inputX = keras.layers.Input(shape=(seq_length,), dtype='int32')\n    embOut = keras.layers.Embedding(vocab_size, embedding_dim, input_length=seq_length)(inputX)\n    # 使用长度为5的词窗去执行卷积\n    conv1 = keras.layers.Conv1D(kernelNum, 5, padding='same', strides=1)(embOut)\n    maxp1 = keras.layers.MaxPool1D(pool_size=int(conv1.shape[1]))(conv1)\n    flat = keras.layers.Flatten()(maxp1)\n    dense1 = keras.layers.Dense(128)(flat)\n    drop = keras.layers.Dropout(0.25)(dense1)\n    denseRelu = keras.layers.ReLU()(drop)\n    predictY = keras.layers.Dense(num_cla, activation='softmax')(denseRelu)\n    # 编译模型\n    model = keras.models.Model(inputs=inputX, outputs=predictY)\n    # 指定loss的计算方法，设置优化器，编译模型\n    model.compile(optimizer=keras.optimizers.Adam(lr=1e-3), loss='categorical_crossentropy', metrics=['accuracy'])\n    return model\n"
  },
  {
    "path": "others_project/trans_widerface_to_xml/create_xml.py",
    "content": "import copy\r\nimport os\r\nfrom xml.dom import minidom as dom\r\n\r\n\r\nclass XMLGenerator(object):\r\n    def __init__(self, xml_name: str):\r\n        self.doc = dom.Document()\r\n        self.xml_name = xml_name\r\n\r\n    def create_append_node(self, node_name, root_node=None):\r\n        \"\"\"创建一个新node并将node添加到root_node下\"\"\"\r\n        new_node = self.doc.createElement(node_name)\r\n        if root_node is not None:\r\n            root_node.appendChild(new_node)\r\n        else:\r\n            self.doc.appendChild(new_node)\r\n        return new_node\r\n\r\n    def create_text_node(self, node_name, node_value, root_node):\r\n        \"\"\"\r\n        创建一个新node，然后在该node中添加一个text_node，\r\n        最后将node添加到root_node下\r\n        \"\"\"\r\n        new_node = self.doc.createElement(node_name)\r\n        node_data = self.doc.createTextNode(node_value)\r\n        new_node.appendChild(node_data)\r\n        root_node.appendChild(new_node)\r\n\r\n    def create_object_node(self, info_dict: dict = None, root_node: str = None):\r\n        if (info_dict is None) or (root_node is None):\r\n            return\r\n\r\n        object_node = self.create_append_node('object', root_node)\r\n        box_node = self.create_append_node('bndbox', object_node)\r\n        self.create_text_node(\"xmin\", info_dict.pop(\"xmin\"), box_node)\r\n        self.create_text_node(\"ymin\", info_dict.pop(\"ymin\"), box_node)\r\n        self.create_text_node(\"xmax\", info_dict.pop(\"xmax\"), box_node)\r\n        self.create_text_node(\"ymax\", info_dict.pop(\"ymax\"), box_node)\r\n\r\n        for k, v in info_dict.items():\r\n            self.create_text_node(k, v, object_node)\r\n\r\n    def save_xml(self):\r\n        f = open(self.xml_name, \"w\")\r\n        self.doc.writexml(f, addindent=\"\\t\", newl=\"\\n\")\r\n        f.close()\r\n\r\n\r\ndef create_pascal_voc_xml(filename: str = None,\r\n                          years: str = 'VOC2012',\r\n                          source_dict: dict = None,\r\n                          objects_list: list = None,\r\n                          im_shape: tuple = None,\r\n                          save_root: str = os.getcwd(),\r\n                          cover: bool = False):\r\n    if not (filename and source_dict and objects_list and im_shape):\r\n        return\r\n\r\n    # 0--Parade/0_Parade_marchingband_1_849.jpg -> 0_Parade_marchingband_1_849.xml\r\n    xml_name = filename.split(os.sep)[-1].split(\".\")[0] + '.xml'\r\n    xml_full_path = os.path.join(save_root, xml_name)\r\n    if os.path.exists(xml_full_path) and (cover is False):\r\n        print(f\"{xml_full_path} already exist, skip.\")\r\n        return\r\n\r\n    xml_generator = XMLGenerator(xml_full_path)\r\n\r\n    # xml root node\r\n    node_root = xml_generator.create_append_node('annotation')\r\n    xml_generator.create_text_node(node_name='folder', node_value=years, root_node=node_root)\r\n    xml_generator.create_text_node(node_name='filename', node_value=filename, root_node=node_root)\r\n\r\n    # source\r\n    node_source = xml_generator.create_append_node('source', root_node=node_root)\r\n    xml_generator.create_text_node(node_name='database', node_value=source_dict['database'], root_node=node_source)\r\n    xml_generator.create_text_node(node_name='annotation', node_value=source_dict['annotation'], root_node=node_source)\r\n    xml_generator.create_text_node(node_name='image', node_value=source_dict['image'], root_node=node_source)\r\n\r\n    # size\r\n    node_size = xml_generator.create_append_node('size', root_node=node_root)\r\n    xml_generator.create_text_node(node_name='height', node_value=str(im_shape[0]), root_node=node_size)\r\n    xml_generator.create_text_node(node_name='width', node_value=str(im_shape[1]), root_node=node_size)\r\n    xml_generator.create_text_node(node_name='depth', node_value=str(im_shape[2]), root_node=node_size)\r\n\r\n    # segmented\r\n    xml_generator.create_text_node(node_name='segmented', node_value='0', root_node=node_root)\r\n\r\n    # object\r\n    for i, ob in enumerate(objects_list):\r\n        xml_generator.create_object_node(info_dict=ob, root_node=node_root)\r\n\r\n    # XML write\r\n    xml_generator.save_xml()\r\n\r\n\r\ndef create_xml_test():\r\n    objects = []\r\n    ob = {'name': 'person', 'pose': 'Unspecified', 'truncated': '0', 'difficult': '0',\r\n          'xmin': '174', 'ymin': '101', 'xmax': '349', 'ymax': '351'}\r\n    objects.append(ob)\r\n    objects.append(copy.deepcopy(ob))\r\n\r\n    years = 'VOC2012'\r\n    filename = 'test.jpg'\r\n    source_dict = {'database': 'The VOC2007 Database', 'annotation': 'PASCAL VOC2007', 'image': 'flickr'}\r\n    im_width = '500'\r\n    im_height = '700'\r\n    im_depth = '3'\r\n    im_shape = (im_width, im_height, im_depth)\r\n    create_pascal_voc_xml(filename=filename, years=years,\r\n                          source_dict=source_dict, objects_list=objects,\r\n                          im_shape=im_shape)\r\n"
  },
  {
    "path": "others_project/trans_widerface_to_xml/main.py",
    "content": "import os\n\nfrom tqdm import tqdm\nimport cv2\nfrom create_xml import create_pascal_voc_xml\n\n\ndef create_xml(labels: list, img_root: str, img_path: str, save_root: str) -> bool:\n    source_dict = {'database': 'The WIDERFACE2017 Database',\n                   'annotation': 'WIDERFACE 2017',\n                   'image': 'WIDERFACE'}\n\n    img_full_path = os.path.join(img_root, img_path)\n    if os.path.exists(img_full_path):\n        im = cv2.imread(img_full_path)\n        im_shape = im.shape\n    else:\n        print(f\"Warning: {img_path} does not exist, can't read image shape.\")\n        im_shape = (0, 0, 0)\n\n    ob_list = []\n    for ob in labels:\n        if ob[7] == '1':\n            # invalid face image, skip\n            continue\n\n        if int(ob[2]) <= 0 or int(ob[3]) <= 0:\n            print(f\"Warning: find bbox w or h <= 0, in {img_path}, skip.\")\n            continue\n\n        ob_dict = {'name': 'face',\n                   'truncated': '0' if ob[8] == '0' else '1',\n                   'difficult': '1' if ob[4] == '2' or ob[8] == '2' else '0',\n                   'xmin': ob[0], 'ymin': ob[1],\n                   'xmax': str(int(ob[0]) + int(ob[2])),\n                   'ymax': str(int(ob[1]) + int(ob[3])),\n                   'blur': ob[4], 'expression': ob[5],\n                   'illumination': ob[6], 'invalid': ob[7],\n                   'occlusion': ob[8], 'pose': ob[9]}\n\n        # if ob[7] == '1':\n        #     cv2.rectangle(im, (int(ob_dict['xmin']), int(ob_dict['ymin'])),\n        #                   (int(ob_dict['xmax']), int(ob_dict['ymax'])),\n        #                   (0, 0, 255))\n        #     cv2.imshow(\"s\", im)\n        #     cv2.waitKey(0)\n\n        ob_list.append(ob_dict)\n    \n    if len(ob_list) == 0: \n        print(f\"in {img_path}, no object, skip.\")\n        return False\n\n    create_pascal_voc_xml(filename=img_path,\n                          years=\"WIDERFACE2017\",\n                          source_dict=source_dict,\n                          objects_list=ob_list,\n                          im_shape=im_shape,\n                          save_root=save_root)\n\n    return True\n\n\ndef parse_wider_txt(data_root: str, split: str, save_root: str):\n    \"\"\"\n    refer to: torchvision.dataset.widerface.py\n    :param data_root:\n    :param split:\n    :param save_root:\n    :return:\n    \"\"\"\n    assert split in ['train', 'val'], f\"split must be in ['train', 'val'], got {split}\"\n\n    if os.path.exists(save_root) is False:\n        os.makedirs(save_root)\n\n    txt_path = os.path.join(data_root, 'wider_face_split', f'wider_face_{split}_bbx_gt.txt')\n    img_root = os.path.join(data_root, f'WIDER_{split}', 'images')\n    with open(txt_path, \"r\") as f:\n        lines = f.readlines()\n        file_name_line, num_boxes_line, box_annotation_line = True, False, False\n        num_boxes, box_counter, idx = 0, 0, 0\n        labels = []\n        xml_list = []\n        progress_bar = tqdm(lines)\n        for line in progress_bar:\n            line = line.rstrip()\n            if file_name_line:\n                img_path = line\n                file_name_line = False\n                num_boxes_line = True\n            elif num_boxes_line:\n                num_boxes = int(line)\n                num_boxes_line = False\n                box_annotation_line = True\n            elif box_annotation_line:\n                box_counter += 1\n                line_split = line.split(\" \")\n                line_values = [x for x in line_split]\n                labels.append(line_values)\n                if box_counter >= num_boxes:\n                    box_annotation_line = False\n                    file_name_line = True\n\n                    if num_boxes == 0:\n                        print(f\"in {img_path}, no object, skip.\")\n                    else:\n                        if create_xml(labels, img_root, img_path, save_root):\n                            # 只记录有目标的xml文件\n                            xml_list.append(img_path.split(\"/\")[-1].split(\".\")[0])\n\n                    box_counter = 0\n                    labels.clear()\n                    idx += 1\n                    progress_bar.set_description(f\"{idx} images\")\n            else:\n                raise RuntimeError(\"Error parsing annotation file {}\".format(txt_path))\n\n        with open(split+'.txt', 'w') as w:\n            w.write(\"\\n\".join(xml_list))\n\n\nparse_wider_txt(\"/data/wider_face/\",\n                \"val\",\n                \"./annotation/\")\n"
  },
  {
    "path": "pytorch_classification/ConfusionMatrix/class_indices.json",
    "content": "{\n    \"0\": \"daisy\",\n    \"1\": \"dandelion\",\n    \"2\": \"roses\",\n    \"3\": \"sunflowers\",\n    \"4\": \"tulips\"\n}"
  },
  {
    "path": "pytorch_classification/ConfusionMatrix/main.py",
    "content": "import os\nimport json\n\nimport torch\nfrom torchvision import transforms, datasets\nimport numpy as np\nfrom tqdm import tqdm\nimport matplotlib.pyplot as plt\nfrom prettytable import PrettyTable\n\nfrom model import MobileNetV2\n\n\nclass ConfusionMatrix(object):\n    \"\"\"\n    注意，如果显示的图像不全，是matplotlib版本问题\n    本例程使用matplotlib-3.2.1(windows and ubuntu)绘制正常\n    需要额外安装prettytable库\n    \"\"\"\n    def __init__(self, num_classes: int, labels: list):\n        self.matrix = np.zeros((num_classes, num_classes))\n        self.num_classes = num_classes\n        self.labels = labels\n\n    def update(self, preds, labels):\n        for p, t in zip(preds, labels):\n            self.matrix[p, t] += 1\n\n    def summary(self):\n        # calculate accuracy\n        sum_TP = 0\n        for i in range(self.num_classes):\n            sum_TP += self.matrix[i, i]\n        acc = sum_TP / np.sum(self.matrix)\n        print(\"the model accuracy is \", acc)\n\n        # precision, recall, specificity\n        table = PrettyTable()\n        table.field_names = [\"\", \"Precision\", \"Recall\", \"Specificity\"]\n        for i in range(self.num_classes):\n            TP = self.matrix[i, i]\n            FP = np.sum(self.matrix[i, :]) - TP\n            FN = np.sum(self.matrix[:, i]) - TP\n            TN = np.sum(self.matrix) - TP - FP - FN\n            Precision = round(TP / (TP + FP), 3) if TP + FP != 0 else 0.\n            Recall = round(TP / (TP + FN), 3) if TP + FN != 0 else 0.\n            Specificity = round(TN / (TN + FP), 3) if TN + FP != 0 else 0.\n            table.add_row([self.labels[i], Precision, Recall, Specificity])\n        print(table)\n\n    def plot(self):\n        matrix = self.matrix\n        print(matrix)\n        plt.imshow(matrix, cmap=plt.cm.Blues)\n\n        # 设置x轴坐标label\n        plt.xticks(range(self.num_classes), self.labels, rotation=45)\n        # 设置y轴坐标label\n        plt.yticks(range(self.num_classes), self.labels)\n        # 显示colorbar\n        plt.colorbar()\n        plt.xlabel('True Labels')\n        plt.ylabel('Predicted Labels')\n        plt.title('Confusion matrix')\n\n        # 在图中标注数量/概率信息\n        thresh = matrix.max() / 2\n        for x in range(self.num_classes):\n            for y in range(self.num_classes):\n                # 注意这里的matrix[y, x]不是matrix[x, y]\n                info = int(matrix[y, x])\n                plt.text(x, y, info,\n                         verticalalignment='center',\n                         horizontalalignment='center',\n                         color=\"white\" if info > thresh else \"black\")\n        plt.tight_layout()\n        plt.show()\n\n\nif __name__ == '__main__':\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n    print(device)\n\n    data_transform = transforms.Compose([transforms.Resize(256),\n                                         transforms.CenterCrop(224),\n                                         transforms.ToTensor(),\n                                         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])\n\n    data_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\n    image_path = os.path.join(data_root, \"data_set\", \"flower_data\")  # flower data set path\n    assert os.path.exists(image_path), \"data path {} does not exist.\".format(image_path)\n\n    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, \"val\"),\n                                            transform=data_transform)\n\n    batch_size = 16\n    validate_loader = torch.utils.data.DataLoader(validate_dataset,\n                                                  batch_size=batch_size, shuffle=False,\n                                                  num_workers=2)\n    net = MobileNetV2(num_classes=5)\n    # load pretrain weights\n    model_weight_path = \"./MobileNetV2.pth\"\n    assert os.path.exists(model_weight_path), \"cannot find {} file\".format(model_weight_path)\n    net.load_state_dict(torch.load(model_weight_path, map_location=device))\n    net.to(device)\n\n    # read class_indict\n    json_label_path = './class_indices.json'\n    assert os.path.exists(json_label_path), \"cannot find {} file\".format(json_label_path)\n    json_file = open(json_label_path, 'r')\n    class_indict = json.load(json_file)\n\n    labels = [label for _, label in class_indict.items()]\n    confusion = ConfusionMatrix(num_classes=5, labels=labels)\n    net.eval()\n    with torch.no_grad():\n        for val_data in tqdm(validate_loader):\n            val_images, val_labels = val_data\n            outputs = net(val_images.to(device))\n            outputs = torch.softmax(outputs, dim=1)\n            outputs = torch.argmax(outputs, dim=1)\n            confusion.update(outputs.to(\"cpu\").numpy(), val_labels.to(\"cpu\").numpy())\n    confusion.plot()\n    confusion.summary()\n\n"
  },
  {
    "path": "pytorch_classification/ConfusionMatrix/model.py",
    "content": "from torch import nn\nimport torch\n\n\ndef _make_divisible(ch, divisor=8, min_ch=None):\n    \"\"\"\n    This function is taken from the original tf repo.\n    It ensures that all layers have a channel number that is divisible by 8\n    It can be seen here:\n    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py\n    \"\"\"\n    if min_ch is None:\n        min_ch = divisor\n    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)\n    # Make sure that round down does not go down by more than 10%.\n    if new_ch < 0.9 * ch:\n        new_ch += divisor\n    return new_ch\n\n\nclass ConvBNReLU(nn.Sequential):\n    def __init__(self, in_channel, out_channel, kernel_size=3, stride=1, groups=1):\n        padding = (kernel_size - 1) // 2\n        super(ConvBNReLU, self).__init__(\n            nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, groups=groups, bias=False),\n            nn.BatchNorm2d(out_channel),\n            nn.ReLU6(inplace=True)\n        )\n\n\nclass InvertedResidual(nn.Module):\n    def __init__(self, in_channel, out_channel, stride, expand_ratio):\n        super(InvertedResidual, self).__init__()\n        hidden_channel = in_channel * expand_ratio\n        self.use_shortcut = stride == 1 and in_channel == out_channel\n\n        layers = []\n        if expand_ratio != 1:\n            # 1x1 pointwise conv\n            layers.append(ConvBNReLU(in_channel, hidden_channel, kernel_size=1))\n        layers.extend([\n            # 3x3 depthwise conv\n            ConvBNReLU(hidden_channel, hidden_channel, stride=stride, groups=hidden_channel),\n            # 1x1 pointwise conv(linear)\n            nn.Conv2d(hidden_channel, out_channel, kernel_size=1, bias=False),\n            nn.BatchNorm2d(out_channel),\n        ])\n\n        self.conv = nn.Sequential(*layers)\n\n    def forward(self, x):\n        if self.use_shortcut:\n            return x + self.conv(x)\n        else:\n            return self.conv(x)\n\n\nclass MobileNetV2(nn.Module):\n    def __init__(self, num_classes=1000, alpha=1.0, round_nearest=8):\n        super(MobileNetV2, self).__init__()\n        block = InvertedResidual\n        input_channel = _make_divisible(32 * alpha, round_nearest)\n        last_channel = _make_divisible(1280 * alpha, round_nearest)\n\n        inverted_residual_setting = [\n            # t, c, n, s\n            [1, 16, 1, 1],\n            [6, 24, 2, 2],\n            [6, 32, 3, 2],\n            [6, 64, 4, 2],\n            [6, 96, 3, 1],\n            [6, 160, 3, 2],\n            [6, 320, 1, 1],\n        ]\n\n        features = []\n        # conv1 layer\n        features.append(ConvBNReLU(3, input_channel, stride=2))\n        # building inverted residual residual blockes\n        for t, c, n, s in inverted_residual_setting:\n            output_channel = _make_divisible(c * alpha, round_nearest)\n            for i in range(n):\n                stride = s if i == 0 else 1\n                features.append(block(input_channel, output_channel, stride, expand_ratio=t))\n                input_channel = output_channel\n        # building last several layers\n        features.append(ConvBNReLU(input_channel, last_channel, 1))\n        # combine feature layers\n        self.features = nn.Sequential(*features)\n\n        # building classifier\n        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))\n        self.classifier = nn.Sequential(\n            nn.Dropout(0.2),\n            nn.Linear(last_channel, num_classes)\n        )\n\n        # weight initialization\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode='fan_out')\n                if m.bias is not None:\n                    nn.init.zeros_(m.bias)\n            elif isinstance(m, nn.BatchNorm2d):\n                nn.init.ones_(m.weight)\n                nn.init.zeros_(m.bias)\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                nn.init.zeros_(m.bias)\n\n    def forward(self, x):\n        x = self.features(x)\n        x = self.avgpool(x)\n        x = torch.flatten(x, 1)\n        x = self.classifier(x)\n        return x\n"
  },
  {
    "path": "pytorch_classification/ConvNeXt/README.md",
    "content": "## 代码使用简介\n\n1. 下载好数据集，代码中默认使用的是花分类数据集，下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz),\n如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0\n2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径\n3. 下载预训练权重，在`model.py`文件中每个模型都有提供预训练权重的下载地址，根据自己使用的模型下载对应预训练权重\n4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径\n5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件)\n6. 在`predict.py`脚本中导入和训练脚本中同样的模型，并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下)\n7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径\n8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了\n9. 如果要使用自己的数据集，请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹)，并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数\n"
  },
  {
    "path": "pytorch_classification/ConvNeXt/model.py",
    "content": "\"\"\"\noriginal code from facebook research:\nhttps://github.com/facebookresearch/ConvNeXt\n\"\"\"\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\n\ndef drop_path(x, drop_prob: float = 0., training: bool = False):\n    \"\"\"Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).\n\n    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,\n    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...\n    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for\n    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use\n    'survival rate' as the argument.\n\n    \"\"\"\n    if drop_prob == 0. or not training:\n        return x\n    keep_prob = 1 - drop_prob\n    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets\n    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)\n    random_tensor.floor_()  # binarize\n    output = x.div(keep_prob) * random_tensor\n    return output\n\n\nclass DropPath(nn.Module):\n    \"\"\"Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).\n    \"\"\"\n    def __init__(self, drop_prob=None):\n        super(DropPath, self).__init__()\n        self.drop_prob = drop_prob\n\n    def forward(self, x):\n        return drop_path(x, self.drop_prob, self.training)\n\n\nclass LayerNorm(nn.Module):\n    r\"\"\" LayerNorm that supports two data formats: channels_last (default) or channels_first.\n    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with\n    shape (batch_size, height, width, channels) while channels_first corresponds to inputs\n    with shape (batch_size, channels, height, width).\n    \"\"\"\n\n    def __init__(self, normalized_shape, eps=1e-6, data_format=\"channels_last\"):\n        super().__init__()\n        self.weight = nn.Parameter(torch.ones(normalized_shape), requires_grad=True)\n        self.bias = nn.Parameter(torch.zeros(normalized_shape), requires_grad=True)\n        self.eps = eps\n        self.data_format = data_format\n        if self.data_format not in [\"channels_last\", \"channels_first\"]:\n            raise ValueError(f\"not support data format '{self.data_format}'\")\n        self.normalized_shape = (normalized_shape,)\n\n    def forward(self, x: torch.Tensor) -> torch.Tensor:\n        if self.data_format == \"channels_last\":\n            return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)\n        elif self.data_format == \"channels_first\":\n            # [batch_size, channels, height, width]\n            mean = x.mean(1, keepdim=True)\n            var = (x - mean).pow(2).mean(1, keepdim=True)\n            x = (x - mean) / torch.sqrt(var + self.eps)\n            x = self.weight[:, None, None] * x + self.bias[:, None, None]\n            return x\n\n\nclass Block(nn.Module):\n    r\"\"\" ConvNeXt Block. There are two equivalent implementations:\n    (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)\n    (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back\n    We use (2) as we find it slightly faster in PyTorch\n\n    Args:\n        dim (int): Number of input channels.\n        drop_rate (float): Stochastic depth rate. Default: 0.0\n        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.\n    \"\"\"\n    def __init__(self, dim, drop_rate=0., layer_scale_init_value=1e-6):\n        super().__init__()\n        self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim)  # depthwise conv\n        self.norm = LayerNorm(dim, eps=1e-6, data_format=\"channels_last\")\n        self.pwconv1 = nn.Linear(dim, 4 * dim)  # pointwise/1x1 convs, implemented with linear layers\n        self.act = nn.GELU()\n        self.pwconv2 = nn.Linear(4 * dim, dim)\n        self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim,)),\n                                  requires_grad=True) if layer_scale_init_value > 0 else None\n        self.drop_path = DropPath(drop_rate) if drop_rate > 0. else nn.Identity()\n\n    def forward(self, x: torch.Tensor) -> torch.Tensor:\n        shortcut = x\n        x = self.dwconv(x)\n        x = x.permute(0, 2, 3, 1)  # [N, C, H, W] -> [N, H, W, C]\n        x = self.norm(x)\n        x = self.pwconv1(x)\n        x = self.act(x)\n        x = self.pwconv2(x)\n        if self.gamma is not None:\n            x = self.gamma * x\n        x = x.permute(0, 3, 1, 2)  # [N, H, W, C] -> [N, C, H, W]\n\n        x = shortcut + self.drop_path(x)\n        return x\n\n\nclass ConvNeXt(nn.Module):\n    r\"\"\" ConvNeXt\n        A PyTorch impl of : `A ConvNet for the 2020s`  -\n          https://arxiv.org/pdf/2201.03545.pdf\n    Args:\n        in_chans (int): Number of input image channels. Default: 3\n        num_classes (int): Number of classes for classification head. Default: 1000\n        depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]\n        dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]\n        drop_path_rate (float): Stochastic depth rate. Default: 0.\n        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.\n        head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.\n    \"\"\"\n    def __init__(self, in_chans: int = 3, num_classes: int = 1000, depths: list = None,\n                 dims: list = None, drop_path_rate: float = 0., layer_scale_init_value: float = 1e-6,\n                 head_init_scale: float = 1.):\n        super().__init__()\n        self.downsample_layers = nn.ModuleList()  # stem and 3 intermediate downsampling conv layers\n        stem = nn.Sequential(nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4),\n                             LayerNorm(dims[0], eps=1e-6, data_format=\"channels_first\"))\n        self.downsample_layers.append(stem)\n\n        # 对应stage2-stage4前的3个downsample\n        for i in range(3):\n            downsample_layer = nn.Sequential(LayerNorm(dims[i], eps=1e-6, data_format=\"channels_first\"),\n                                             nn.Conv2d(dims[i], dims[i+1], kernel_size=2, stride=2))\n            self.downsample_layers.append(downsample_layer)\n\n        self.stages = nn.ModuleList()  # 4 feature resolution stages, each consisting of multiple blocks\n        dp_rates = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]\n        cur = 0\n        # 构建每个stage中堆叠的block\n        for i in range(4):\n            stage = nn.Sequential(\n                *[Block(dim=dims[i], drop_rate=dp_rates[cur + j], layer_scale_init_value=layer_scale_init_value)\n                  for j in range(depths[i])]\n            )\n            self.stages.append(stage)\n            cur += depths[i]\n\n        self.norm = nn.LayerNorm(dims[-1], eps=1e-6)  # final norm layer\n        self.head = nn.Linear(dims[-1], num_classes)\n        self.apply(self._init_weights)\n        self.head.weight.data.mul_(head_init_scale)\n        self.head.bias.data.mul_(head_init_scale)\n\n    def _init_weights(self, m):\n        if isinstance(m, (nn.Conv2d, nn.Linear)):\n            nn.init.trunc_normal_(m.weight, std=0.2)\n            nn.init.constant_(m.bias, 0)\n\n    def forward_features(self, x: torch.Tensor) -> torch.Tensor:\n        for i in range(4):\n            x = self.downsample_layers[i](x)\n            x = self.stages[i](x)\n\n        return self.norm(x.mean([-2, -1]))  # global average pooling, (N, C, H, W) -> (N, C)\n\n    def forward(self, x: torch.Tensor) -> torch.Tensor:\n        x = self.forward_features(x)\n        x = self.head(x)\n        return x\n\n\ndef convnext_tiny(num_classes: int):\n    # https://dl.fbaipublicfiles.com/convnext/convnext_tiny_1k_224_ema.pth\n    model = ConvNeXt(depths=[3, 3, 9, 3],\n                     dims=[96, 192, 384, 768],\n                     num_classes=num_classes)\n    return model\n\n\ndef convnext_small(num_classes: int):\n    # https://dl.fbaipublicfiles.com/convnext/convnext_small_1k_224_ema.pth\n    model = ConvNeXt(depths=[3, 3, 27, 3],\n                     dims=[96, 192, 384, 768],\n                     num_classes=num_classes)\n    return model\n\n\ndef convnext_base(num_classes: int):\n    # https://dl.fbaipublicfiles.com/convnext/convnext_base_1k_224_ema.pth\n    # https://dl.fbaipublicfiles.com/convnext/convnext_base_22k_224.pth\n    model = ConvNeXt(depths=[3, 3, 27, 3],\n                     dims=[128, 256, 512, 1024],\n                     num_classes=num_classes)\n    return model\n\n\ndef convnext_large(num_classes: int):\n    # https://dl.fbaipublicfiles.com/convnext/convnext_large_1k_224_ema.pth\n    # https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_224.pth\n    model = ConvNeXt(depths=[3, 3, 27, 3],\n                     dims=[192, 384, 768, 1536],\n                     num_classes=num_classes)\n    return model\n\n\ndef convnext_xlarge(num_classes: int):\n    # https://dl.fbaipublicfiles.com/convnext/convnext_xlarge_22k_224.pth\n    model = ConvNeXt(depths=[3, 3, 27, 3],\n                     dims=[256, 512, 1024, 2048],\n                     num_classes=num_classes)\n    return model\n"
  },
  {
    "path": "pytorch_classification/ConvNeXt/my_dataset.py",
    "content": "from PIL import Image\nimport torch\nfrom torch.utils.data import Dataset\n\n\nclass MyDataSet(Dataset):\n    \"\"\"自定义数据集\"\"\"\n\n    def __init__(self, images_path: list, images_class: list, transform=None):\n        self.images_path = images_path\n        self.images_class = images_class\n        self.transform = transform\n\n    def __len__(self):\n        return len(self.images_path)\n\n    def __getitem__(self, item):\n        img = Image.open(self.images_path[item])\n        # RGB为彩色图片，L为灰度图片\n        if img.mode != 'RGB':\n            raise ValueError(\"image: {} isn't RGB mode.\".format(self.images_path[item]))\n        label = self.images_class[item]\n\n        if self.transform is not None:\n            img = self.transform(img)\n\n        return img, label\n\n    @staticmethod\n    def collate_fn(batch):\n        # 官方实现的default_collate可以参考\n        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py\n        images, labels = tuple(zip(*batch))\n\n        images = torch.stack(images, dim=0)\n        labels = torch.as_tensor(labels)\n        return images, labels\n"
  },
  {
    "path": "pytorch_classification/ConvNeXt/predict.py",
    "content": "import os\nimport json\n\nimport torch\nfrom PIL import Image\nfrom torchvision import transforms\nimport matplotlib.pyplot as plt\n\nfrom model import convnext_tiny as create_model\n\n\ndef main():\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n    print(f\"using {device} device.\")\n\n    num_classes = 5\n    img_size = 224\n    data_transform = transforms.Compose(\n        [transforms.Resize(int(img_size * 1.14)),\n         transforms.CenterCrop(img_size),\n         transforms.ToTensor(),\n         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n    plt.imshow(img)\n    # [N, C, H, W]\n    img = data_transform(img)\n    # expand batch dimension\n    img = torch.unsqueeze(img, dim=0)\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    # create model\n    model = create_model(num_classes=num_classes).to(device)\n    # load model weights\n    model_weight_path = \"./weights/best_model.pth\"\n    model.load_state_dict(torch.load(model_weight_path, map_location=device))\n    model.eval()\n    with torch.no_grad():\n        # predict class\n        output = torch.squeeze(model(img.to(device))).cpu()\n        predict = torch.softmax(output, dim=0)\n        predict_cla = torch.argmax(predict).numpy()\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_cla)],\n                                                 predict[predict_cla].numpy())\n    plt.title(print_res)\n    for i in range(len(predict)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  predict[i].numpy()))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/ConvNeXt/train.py",
    "content": "import os\nimport argparse\n\nimport torch\nimport torch.optim as optim\nfrom torch.utils.tensorboard import SummaryWriter\nfrom torchvision import transforms\n\nfrom my_dataset import MyDataSet\nfrom model import convnext_tiny as create_model\nfrom utils import read_split_data, create_lr_scheduler, get_params_groups, train_one_epoch, evaluate\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n    print(f\"using {device} device.\")\n\n    if os.path.exists(\"./weights\") is False:\n        os.makedirs(\"./weights\")\n\n    tb_writer = SummaryWriter()\n\n    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)\n\n    img_size = 224\n    data_transform = {\n        \"train\": transforms.Compose([transforms.RandomResizedCrop(img_size),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),\n        \"val\": transforms.Compose([transforms.Resize(int(img_size * 1.143)),\n                                   transforms.CenterCrop(img_size),\n                                   transforms.ToTensor(),\n                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}\n\n    # 实例化训练数据集\n    train_dataset = MyDataSet(images_path=train_images_path,\n                              images_class=train_images_label,\n                              transform=data_transform[\"train\"])\n\n    # 实例化验证数据集\n    val_dataset = MyDataSet(images_path=val_images_path,\n                            images_class=val_images_label,\n                            transform=data_transform[\"val\"])\n\n    batch_size = args.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using {} dataloader workers every process'.format(nw))\n    train_loader = torch.utils.data.DataLoader(train_dataset,\n                                               batch_size=batch_size,\n                                               shuffle=True,\n                                               pin_memory=True,\n                                               num_workers=nw,\n                                               collate_fn=train_dataset.collate_fn)\n\n    val_loader = torch.utils.data.DataLoader(val_dataset,\n                                             batch_size=batch_size,\n                                             shuffle=False,\n                                             pin_memory=True,\n                                             num_workers=nw,\n                                             collate_fn=val_dataset.collate_fn)\n\n    model = create_model(num_classes=args.num_classes).to(device)\n\n    if args.weights != \"\":\n        assert os.path.exists(args.weights), \"weights file: '{}' not exist.\".format(args.weights)\n        weights_dict = torch.load(args.weights, map_location=device)[\"model\"]\n        # 删除有关分类类别的权重\n        for k in list(weights_dict.keys()):\n            if \"head\" in k:\n                del weights_dict[k]\n        print(model.load_state_dict(weights_dict, strict=False))\n\n    if args.freeze_layers:\n        for name, para in model.named_parameters():\n            # 除head外，其他权重全部冻结\n            if \"head\" not in name:\n                para.requires_grad_(False)\n            else:\n                print(\"training {}\".format(name))\n\n    # pg = [p for p in model.parameters() if p.requires_grad]\n    pg = get_params_groups(model, weight_decay=args.wd)\n    optimizer = optim.AdamW(pg, lr=args.lr, weight_decay=args.wd)\n    lr_scheduler = create_lr_scheduler(optimizer, len(train_loader), args.epochs,\n                                       warmup=True, warmup_epochs=1)\n\n    best_acc = 0.\n    for epoch in range(args.epochs):\n        # train\n        train_loss, train_acc = train_one_epoch(model=model,\n                                                optimizer=optimizer,\n                                                data_loader=train_loader,\n                                                device=device,\n                                                epoch=epoch,\n                                                lr_scheduler=lr_scheduler)\n\n        # validate\n        val_loss, val_acc = evaluate(model=model,\n                                     data_loader=val_loader,\n                                     device=device,\n                                     epoch=epoch)\n\n        tags = [\"train_loss\", \"train_acc\", \"val_loss\", \"val_acc\", \"learning_rate\"]\n        tb_writer.add_scalar(tags[0], train_loss, epoch)\n        tb_writer.add_scalar(tags[1], train_acc, epoch)\n        tb_writer.add_scalar(tags[2], val_loss, epoch)\n        tb_writer.add_scalar(tags[3], val_acc, epoch)\n        tb_writer.add_scalar(tags[4], optimizer.param_groups[0][\"lr\"], epoch)\n\n        if best_acc < val_acc:\n            torch.save(model.state_dict(), \"./weights/best_model.pth\")\n            best_acc = val_acc\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--num_classes', type=int, default=5)\n    parser.add_argument('--epochs', type=int, default=10)\n    parser.add_argument('--batch-size', type=int, default=8)\n    parser.add_argument('--lr', type=float, default=5e-4)\n    parser.add_argument('--wd', type=float, default=5e-2)\n\n    # 数据集所在根目录\n    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz\n    parser.add_argument('--data-path', type=str,\n                        default=\"/data/flower_photos\")\n\n    # 预训练权重路径，如果不想载入就设置为空字符\n    # 链接: https://pan.baidu.com/s/1aNqQW4n_RrUlWUBNlaJRHA  密码: i83t\n    parser.add_argument('--weights', type=str, default='./convnext_tiny_1k_224_ema.pth',\n                        help='initial weights path')\n    # 是否冻结head以外所有权重\n    parser.add_argument('--freeze-layers', type=bool, default=False)\n    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')\n\n    opt = parser.parse_args()\n\n    main(opt)\n"
  },
  {
    "path": "pytorch_classification/ConvNeXt/utils.py",
    "content": "import os\nimport sys\nimport json\nimport pickle\nimport random\nimport math\n\nimport torch\nfrom tqdm import tqdm\n\nimport matplotlib.pyplot as plt\n\n\ndef read_split_data(root: str, val_rate: float = 0.2):\n    random.seed(0)  # 保证随机结果可复现\n    assert os.path.exists(root), \"dataset root: {} does not exist.\".format(root)\n\n    # 遍历文件夹，一个文件夹对应一个类别\n    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]\n    # 排序，保证各平台顺序一致\n    flower_class.sort()\n    # 生成类别名称以及对应的数字索引\n    class_indices = dict((k, v) for v, k in enumerate(flower_class))\n    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    train_images_path = []  # 存储训练集的所有图片路径\n    train_images_label = []  # 存储训练集图片对应索引信息\n    val_images_path = []  # 存储验证集的所有图片路径\n    val_images_label = []  # 存储验证集图片对应索引信息\n    every_class_num = []  # 存储每个类别的样本总数\n    supported = [\".jpg\", \".JPG\", \".png\", \".PNG\"]  # 支持的文件后缀类型\n    # 遍历每个文件夹下的文件\n    for cla in flower_class:\n        cla_path = os.path.join(root, cla)\n        # 遍历获取supported支持的所有文件路径\n        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)\n                  if os.path.splitext(i)[-1] in supported]\n        # 排序，保证各平台顺序一致\n        images.sort()\n        # 获取该类别对应的索引\n        image_class = class_indices[cla]\n        # 记录该类别的样本数量\n        every_class_num.append(len(images))\n        # 按比例随机采样验证样本\n        val_path = random.sample(images, k=int(len(images) * val_rate))\n\n        for img_path in images:\n            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集\n                val_images_path.append(img_path)\n                val_images_label.append(image_class)\n            else:  # 否则存入训练集\n                train_images_path.append(img_path)\n                train_images_label.append(image_class)\n\n    print(\"{} images were found in the dataset.\".format(sum(every_class_num)))\n    print(\"{} images for training.\".format(len(train_images_path)))\n    print(\"{} images for validation.\".format(len(val_images_path)))\n    assert len(train_images_path) > 0, \"number of training images must greater than 0.\"\n    assert len(val_images_path) > 0, \"number of validation images must greater than 0.\"\n\n    plot_image = False\n    if plot_image:\n        # 绘制每种类别个数柱状图\n        plt.bar(range(len(flower_class)), every_class_num, align='center')\n        # 将横坐标0,1,2,3,4替换为相应的类别名称\n        plt.xticks(range(len(flower_class)), flower_class)\n        # 在柱状图上添加数值标签\n        for i, v in enumerate(every_class_num):\n            plt.text(x=i, y=v + 5, s=str(v), ha='center')\n        # 设置x坐标\n        plt.xlabel('image class')\n        # 设置y坐标\n        plt.ylabel('number of images')\n        # 设置柱状图的标题\n        plt.title('flower class distribution')\n        plt.show()\n\n    return train_images_path, train_images_label, val_images_path, val_images_label\n\n\ndef plot_data_loader_image(data_loader):\n    batch_size = data_loader.batch_size\n    plot_num = min(batch_size, 4)\n\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), json_path + \" does not exist.\"\n    json_file = open(json_path, 'r')\n    class_indices = json.load(json_file)\n\n    for data in data_loader:\n        images, labels = data\n        for i in range(plot_num):\n            # [C, H, W] -> [H, W, C]\n            img = images[i].numpy().transpose(1, 2, 0)\n            # 反Normalize操作\n            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255\n            label = labels[i].item()\n            plt.subplot(1, plot_num, i+1)\n            plt.xlabel(class_indices[str(label)])\n            plt.xticks([])  # 去掉x轴的刻度\n            plt.yticks([])  # 去掉y轴的刻度\n            plt.imshow(img.astype('uint8'))\n        plt.show()\n\n\ndef write_pickle(list_info: list, file_name: str):\n    with open(file_name, 'wb') as f:\n        pickle.dump(list_info, f)\n\n\ndef read_pickle(file_name: str) -> list:\n    with open(file_name, 'rb') as f:\n        info_list = pickle.load(f)\n        return info_list\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch, lr_scheduler):\n    model.train()\n    loss_function = torch.nn.CrossEntropyLoss()\n    accu_loss = torch.zeros(1).to(device)  # 累计损失\n    accu_num = torch.zeros(1).to(device)   # 累计预测正确的样本数\n    optimizer.zero_grad()\n\n    sample_num = 0\n    data_loader = tqdm(data_loader, file=sys.stdout)\n    for step, data in enumerate(data_loader):\n        images, labels = data\n        sample_num += images.shape[0]\n\n        pred = model(images.to(device))\n        pred_classes = torch.max(pred, dim=1)[1]\n        accu_num += torch.eq(pred_classes, labels.to(device)).sum()\n\n        loss = loss_function(pred, labels.to(device))\n        loss.backward()\n        accu_loss += loss.detach()\n\n        data_loader.desc = \"[train epoch {}] loss: {:.3f}, acc: {:.3f}, lr: {:.5f}\".format(\n            epoch,\n            accu_loss.item() / (step + 1),\n            accu_num.item() / sample_num,\n            optimizer.param_groups[0][\"lr\"]\n        )\n\n        if not torch.isfinite(loss):\n            print('WARNING: non-finite loss, ending training ', loss)\n            sys.exit(1)\n\n        optimizer.step()\n        optimizer.zero_grad()\n        # update lr\n        lr_scheduler.step()\n\n    return accu_loss.item() / (step + 1), accu_num.item() / sample_num\n\n\n@torch.no_grad()\ndef evaluate(model, data_loader, device, epoch):\n    loss_function = torch.nn.CrossEntropyLoss()\n\n    model.eval()\n\n    accu_num = torch.zeros(1).to(device)   # 累计预测正确的样本数\n    accu_loss = torch.zeros(1).to(device)  # 累计损失\n\n    sample_num = 0\n    data_loader = tqdm(data_loader, file=sys.stdout)\n    for step, data in enumerate(data_loader):\n        images, labels = data\n        sample_num += images.shape[0]\n\n        pred = model(images.to(device))\n        pred_classes = torch.max(pred, dim=1)[1]\n        accu_num += torch.eq(pred_classes, labels.to(device)).sum()\n\n        loss = loss_function(pred, labels.to(device))\n        accu_loss += loss\n\n        data_loader.desc = \"[valid epoch {}] loss: {:.3f}, acc: {:.3f}\".format(\n            epoch,\n            accu_loss.item() / (step + 1),\n            accu_num.item() / sample_num\n        )\n\n    return accu_loss.item() / (step + 1), accu_num.item() / sample_num\n\n\ndef create_lr_scheduler(optimizer,\n                        num_step: int,\n                        epochs: int,\n                        warmup=True,\n                        warmup_epochs=1,\n                        warmup_factor=1e-3,\n                        end_factor=1e-6):\n    assert num_step > 0 and epochs > 0\n    if warmup is False:\n        warmup_epochs = 0\n\n    def f(x):\n        \"\"\"\n        根据step数返回一个学习率倍率因子，\n        注意在训练开始之前，pytorch会提前调用一次lr_scheduler.step()方法\n        \"\"\"\n        if warmup is True and x <= (warmup_epochs * num_step):\n            alpha = float(x) / (warmup_epochs * num_step)\n            # warmup过程中lr倍率因子从warmup_factor -> 1\n            return warmup_factor * (1 - alpha) + alpha\n        else:\n            current_step = (x - warmup_epochs * num_step)\n            cosine_steps = (epochs - warmup_epochs) * num_step\n            # warmup后lr倍率因子从1 -> end_factor\n            return ((1 + math.cos(current_step * math.pi / cosine_steps)) / 2) * (1 - end_factor) + end_factor\n\n    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)\n\n\ndef get_params_groups(model: torch.nn.Module, weight_decay: float = 1e-5):\n    # 记录optimize要训练的权重参数\n    parameter_group_vars = {\"decay\": {\"params\": [], \"weight_decay\": weight_decay},\n                            \"no_decay\": {\"params\": [], \"weight_decay\": 0.}}\n\n    # 记录对应的权重名称\n    parameter_group_names = {\"decay\": {\"params\": [], \"weight_decay\": weight_decay},\n                             \"no_decay\": {\"params\": [], \"weight_decay\": 0.}}\n\n    for name, param in model.named_parameters():\n        if not param.requires_grad:\n            continue  # frozen weights\n\n        if len(param.shape) == 1 or name.endswith(\".bias\"):\n            group_name = \"no_decay\"\n        else:\n            group_name = \"decay\"\n\n        parameter_group_vars[group_name][\"params\"].append(param)\n        parameter_group_names[group_name][\"params\"].append(name)\n\n    print(\"Param groups = %s\" % json.dumps(parameter_group_names, indent=2))\n    return list(parameter_group_vars.values())\n"
  },
  {
    "path": "pytorch_classification/MobileViT/README.md",
    "content": "## 代码使用简介\n\n1. 下载好数据集，代码中默认使用的是花分类数据集，下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz),\n如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0\n2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径\n3. 下载预训练权重，在`model.py`文件中每个模型都有提供预训练权重的下载地址，根据自己使用的模型下载对应预训练权重\n4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径\n5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件)\n6. 在`predict.py`脚本中导入和训练脚本中同样的模型，并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下)\n7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径\n8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了\n9. 如果要使用自己的数据集，请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹)，并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数\n"
  },
  {
    "path": "pytorch_classification/MobileViT/model.py",
    "content": "\"\"\"\noriginal code from apple:\nhttps://github.com/apple/ml-cvnets/blob/main/cvnets/models/classification/mobilevit.py\n\"\"\"\n\nfrom typing import Optional, Tuple, Union, Dict\nimport math\nimport torch\nimport torch.nn as nn\nfrom torch import Tensor\nfrom torch.nn import functional as F\n\nfrom transformer import TransformerEncoder\nfrom model_config import get_config\n\n\ndef make_divisible(\n    v: Union[float, int],\n    divisor: Optional[int] = 8,\n    min_value: Optional[Union[float, int]] = None,\n) -> Union[float, int]:\n    \"\"\"\n    This function is taken from the original tf repo.\n    It ensures that all layers have a channel number that is divisible by 8\n    It can be seen here:\n    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py\n    :param v:\n    :param divisor:\n    :param min_value:\n    :return:\n    \"\"\"\n    if min_value is None:\n        min_value = divisor\n    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)\n    # Make sure that round down does not go down by more than 10%.\n    if new_v < 0.9 * v:\n        new_v += divisor\n    return new_v\n\n\nclass ConvLayer(nn.Module):\n    \"\"\"\n    Applies a 2D convolution over an input\n\n    Args:\n        in_channels (int): :math:`C_{in}` from an expected input of size :math:`(N, C_{in}, H_{in}, W_{in})`\n        out_channels (int): :math:`C_{out}` from an expected output of size :math:`(N, C_{out}, H_{out}, W_{out})`\n        kernel_size (Union[int, Tuple[int, int]]): Kernel size for convolution.\n        stride (Union[int, Tuple[int, int]]): Stride for convolution. Default: 1\n        groups (Optional[int]): Number of groups in convolution. Default: 1\n        bias (Optional[bool]): Use bias. Default: ``False``\n        use_norm (Optional[bool]): Use normalization layer after convolution. Default: ``True``\n        use_act (Optional[bool]): Use activation layer after convolution (or convolution and normalization).\n                                Default: ``True``\n\n    Shape:\n        - Input: :math:`(N, C_{in}, H_{in}, W_{in})`\n        - Output: :math:`(N, C_{out}, H_{out}, W_{out})`\n\n    .. note::\n        For depth-wise convolution, `groups=C_{in}=C_{out}`.\n    \"\"\"\n\n    def __init__(\n        self,\n        in_channels: int,\n        out_channels: int,\n        kernel_size: Union[int, Tuple[int, int]],\n        stride: Optional[Union[int, Tuple[int, int]]] = 1,\n        groups: Optional[int] = 1,\n        bias: Optional[bool] = False,\n        use_norm: Optional[bool] = True,\n        use_act: Optional[bool] = True,\n    ) -> None:\n        super().__init__()\n\n        if isinstance(kernel_size, int):\n            kernel_size = (kernel_size, kernel_size)\n\n        if isinstance(stride, int):\n            stride = (stride, stride)\n\n        assert isinstance(kernel_size, Tuple)\n        assert isinstance(stride, Tuple)\n\n        padding = (\n            int((kernel_size[0] - 1) / 2),\n            int((kernel_size[1] - 1) / 2),\n        )\n\n        block = nn.Sequential()\n\n        conv_layer = nn.Conv2d(\n            in_channels=in_channels,\n            out_channels=out_channels,\n            kernel_size=kernel_size,\n            stride=stride,\n            groups=groups,\n            padding=padding,\n            bias=bias\n        )\n\n        block.add_module(name=\"conv\", module=conv_layer)\n\n        if use_norm:\n            norm_layer = nn.BatchNorm2d(num_features=out_channels, momentum=0.1)\n            block.add_module(name=\"norm\", module=norm_layer)\n\n        if use_act:\n            act_layer = nn.SiLU()\n            block.add_module(name=\"act\", module=act_layer)\n\n        self.block = block\n\n    def forward(self, x: Tensor) -> Tensor:\n        return self.block(x)\n\n\nclass InvertedResidual(nn.Module):\n    \"\"\"\n    This class implements the inverted residual block, as described in `MobileNetv2 <https://arxiv.org/abs/1801.04381>`_ paper\n\n    Args:\n        in_channels (int): :math:`C_{in}` from an expected input of size :math:`(N, C_{in}, H_{in}, W_{in})`\n        out_channels (int): :math:`C_{out}` from an expected output of size :math:`(N, C_{out}, H_{out}, W_{out)`\n        stride (int): Use convolutions with a stride. Default: 1\n        expand_ratio (Union[int, float]): Expand the input channels by this factor in depth-wise conv\n        skip_connection (Optional[bool]): Use skip-connection. Default: True\n\n    Shape:\n        - Input: :math:`(N, C_{in}, H_{in}, W_{in})`\n        - Output: :math:`(N, C_{out}, H_{out}, W_{out})`\n\n    .. note::\n        If `in_channels =! out_channels` and `stride > 1`, we set `skip_connection=False`\n\n    \"\"\"\n\n    def __init__(\n        self,\n        in_channels: int,\n        out_channels: int,\n        stride: int,\n        expand_ratio: Union[int, float],\n        skip_connection: Optional[bool] = True,\n    ) -> None:\n        assert stride in [1, 2]\n        hidden_dim = make_divisible(int(round(in_channels * expand_ratio)), 8)\n\n        super().__init__()\n\n        block = nn.Sequential()\n        if expand_ratio != 1:\n            block.add_module(\n                name=\"exp_1x1\",\n                module=ConvLayer(\n                    in_channels=in_channels,\n                    out_channels=hidden_dim,\n                    kernel_size=1\n                ),\n            )\n\n        block.add_module(\n            name=\"conv_3x3\",\n            module=ConvLayer(\n                in_channels=hidden_dim,\n                out_channels=hidden_dim,\n                stride=stride,\n                kernel_size=3,\n                groups=hidden_dim\n            ),\n        )\n\n        block.add_module(\n            name=\"red_1x1\",\n            module=ConvLayer(\n                in_channels=hidden_dim,\n                out_channels=out_channels,\n                kernel_size=1,\n                use_act=False,\n                use_norm=True,\n            ),\n        )\n\n        self.block = block\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n        self.exp = expand_ratio\n        self.stride = stride\n        self.use_res_connect = (\n            self.stride == 1 and in_channels == out_channels and skip_connection\n        )\n\n    def forward(self, x: Tensor, *args, **kwargs) -> Tensor:\n        if self.use_res_connect:\n            return x + self.block(x)\n        else:\n            return self.block(x)\n\n\nclass MobileViTBlock(nn.Module):\n    \"\"\"\n    This class defines the `MobileViT block <https://arxiv.org/abs/2110.02178?context=cs.LG>`_\n\n    Args:\n        opts: command line arguments\n        in_channels (int): :math:`C_{in}` from an expected input of size :math:`(N, C_{in}, H, W)`\n        transformer_dim (int): Input dimension to the transformer unit\n        ffn_dim (int): Dimension of the FFN block\n        n_transformer_blocks (int): Number of transformer blocks. Default: 2\n        head_dim (int): Head dimension in the multi-head attention. Default: 32\n        attn_dropout (float): Dropout in multi-head attention. Default: 0.0\n        dropout (float): Dropout rate. Default: 0.0\n        ffn_dropout (float): Dropout between FFN layers in transformer. Default: 0.0\n        patch_h (int): Patch height for unfolding operation. Default: 8\n        patch_w (int): Patch width for unfolding operation. Default: 8\n        transformer_norm_layer (Optional[str]): Normalization layer in the transformer block. Default: layer_norm\n        conv_ksize (int): Kernel size to learn local representations in MobileViT block. Default: 3\n        no_fusion (Optional[bool]): Do not combine the input and output feature maps. Default: False\n    \"\"\"\n\n    def __init__(\n        self,\n        in_channels: int,\n        transformer_dim: int,\n        ffn_dim: int,\n        n_transformer_blocks: int = 2,\n        head_dim: int = 32,\n        attn_dropout: float = 0.0,\n        dropout: float = 0.0,\n        ffn_dropout: float = 0.0,\n        patch_h: int = 8,\n        patch_w: int = 8,\n        conv_ksize: Optional[int] = 3,\n        *args,\n        **kwargs\n    ) -> None:\n        super().__init__()\n\n        conv_3x3_in = ConvLayer(\n            in_channels=in_channels,\n            out_channels=in_channels,\n            kernel_size=conv_ksize,\n            stride=1\n        )\n        conv_1x1_in = ConvLayer(\n            in_channels=in_channels,\n            out_channels=transformer_dim,\n            kernel_size=1,\n            stride=1,\n            use_norm=False,\n            use_act=False\n        )\n\n        conv_1x1_out = ConvLayer(\n            in_channels=transformer_dim,\n            out_channels=in_channels,\n            kernel_size=1,\n            stride=1\n        )\n        conv_3x3_out = ConvLayer(\n            in_channels=2 * in_channels,\n            out_channels=in_channels,\n            kernel_size=conv_ksize,\n            stride=1\n        )\n\n        self.local_rep = nn.Sequential()\n        self.local_rep.add_module(name=\"conv_3x3\", module=conv_3x3_in)\n        self.local_rep.add_module(name=\"conv_1x1\", module=conv_1x1_in)\n\n        assert transformer_dim % head_dim == 0\n        num_heads = transformer_dim // head_dim\n\n        global_rep = [\n            TransformerEncoder(\n                embed_dim=transformer_dim,\n                ffn_latent_dim=ffn_dim,\n                num_heads=num_heads,\n                attn_dropout=attn_dropout,\n                dropout=dropout,\n                ffn_dropout=ffn_dropout\n            )\n            for _ in range(n_transformer_blocks)\n        ]\n        global_rep.append(nn.LayerNorm(transformer_dim))\n        self.global_rep = nn.Sequential(*global_rep)\n\n        self.conv_proj = conv_1x1_out\n        self.fusion = conv_3x3_out\n\n        self.patch_h = patch_h\n        self.patch_w = patch_w\n        self.patch_area = self.patch_w * self.patch_h\n\n        self.cnn_in_dim = in_channels\n        self.cnn_out_dim = transformer_dim\n        self.n_heads = num_heads\n        self.ffn_dim = ffn_dim\n        self.dropout = dropout\n        self.attn_dropout = attn_dropout\n        self.ffn_dropout = ffn_dropout\n        self.n_blocks = n_transformer_blocks\n        self.conv_ksize = conv_ksize\n\n    def unfolding(self, x: Tensor) -> Tuple[Tensor, Dict]:\n        patch_w, patch_h = self.patch_w, self.patch_h\n        patch_area = patch_w * patch_h\n        batch_size, in_channels, orig_h, orig_w = x.shape\n\n        new_h = int(math.ceil(orig_h / self.patch_h) * self.patch_h)\n        new_w = int(math.ceil(orig_w / self.patch_w) * self.patch_w)\n\n        interpolate = False\n        if new_w != orig_w or new_h != orig_h:\n            # Note: Padding can be done, but then it needs to be handled in attention function.\n            x = F.interpolate(x, size=(new_h, new_w), mode=\"bilinear\", align_corners=False)\n            interpolate = True\n\n        # number of patches along width and height\n        num_patch_w = new_w // patch_w  # n_w\n        num_patch_h = new_h // patch_h  # n_h\n        num_patches = num_patch_h * num_patch_w  # N\n\n        # [B, C, H, W] -> [B * C * n_h, p_h, n_w, p_w]\n        x = x.reshape(batch_size * in_channels * num_patch_h, patch_h, num_patch_w, patch_w)\n        # [B * C * n_h, p_h, n_w, p_w] -> [B * C * n_h, n_w, p_h, p_w]\n        x = x.transpose(1, 2)\n        # [B * C * n_h, n_w, p_h, p_w] -> [B, C, N, P] where P = p_h * p_w and N = n_h * n_w\n        x = x.reshape(batch_size, in_channels, num_patches, patch_area)\n        # [B, C, N, P] -> [B, P, N, C]\n        x = x.transpose(1, 3)\n        # [B, P, N, C] -> [BP, N, C]\n        x = x.reshape(batch_size * patch_area, num_patches, -1)\n\n        info_dict = {\n            \"orig_size\": (orig_h, orig_w),\n            \"batch_size\": batch_size,\n            \"interpolate\": interpolate,\n            \"total_patches\": num_patches,\n            \"num_patches_w\": num_patch_w,\n            \"num_patches_h\": num_patch_h,\n        }\n\n        return x, info_dict\n\n    def folding(self, x: Tensor, info_dict: Dict) -> Tensor:\n        n_dim = x.dim()\n        assert n_dim == 3, \"Tensor should be of shape BPxNxC. Got: {}\".format(\n            x.shape\n        )\n        # [BP, N, C] --> [B, P, N, C]\n        x = x.contiguous().view(\n            info_dict[\"batch_size\"], self.patch_area, info_dict[\"total_patches\"], -1\n        )\n\n        batch_size, pixels, num_patches, channels = x.size()\n        num_patch_h = info_dict[\"num_patches_h\"]\n        num_patch_w = info_dict[\"num_patches_w\"]\n\n        # [B, P, N, C] -> [B, C, N, P]\n        x = x.transpose(1, 3)\n        # [B, C, N, P] -> [B*C*n_h, n_w, p_h, p_w]\n        x = x.reshape(batch_size * channels * num_patch_h, num_patch_w, self.patch_h, self.patch_w)\n        # [B*C*n_h, n_w, p_h, p_w] -> [B*C*n_h, p_h, n_w, p_w]\n        x = x.transpose(1, 2)\n        # [B*C*n_h, p_h, n_w, p_w] -> [B, C, H, W]\n        x = x.reshape(batch_size, channels, num_patch_h * self.patch_h, num_patch_w * self.patch_w)\n        if info_dict[\"interpolate\"]:\n            x = F.interpolate(\n                x,\n                size=info_dict[\"orig_size\"],\n                mode=\"bilinear\",\n                align_corners=False,\n            )\n        return x\n\n    def forward(self, x: Tensor) -> Tensor:\n        res = x\n\n        fm = self.local_rep(x)\n\n        # convert feature map to patches\n        patches, info_dict = self.unfolding(fm)\n\n        # learn global representations\n        for transformer_layer in self.global_rep:\n            patches = transformer_layer(patches)\n\n        # [B x Patch x Patches x C] -> [B x C x Patches x Patch]\n        fm = self.folding(x=patches, info_dict=info_dict)\n\n        fm = self.conv_proj(fm)\n\n        fm = self.fusion(torch.cat((res, fm), dim=1))\n        return fm\n\n\nclass MobileViT(nn.Module):\n    \"\"\"\n    This class implements the `MobileViT architecture <https://arxiv.org/abs/2110.02178?context=cs.LG>`_\n    \"\"\"\n    def __init__(self, model_cfg: Dict, num_classes: int = 1000):\n        super().__init__()\n\n        image_channels = 3\n        out_channels = 16\n\n        self.conv_1 = ConvLayer(\n            in_channels=image_channels,\n            out_channels=out_channels,\n            kernel_size=3,\n            stride=2\n        )\n\n        self.layer_1, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg[\"layer1\"])\n        self.layer_2, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg[\"layer2\"])\n        self.layer_3, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg[\"layer3\"])\n        self.layer_4, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg[\"layer4\"])\n        self.layer_5, out_channels = self._make_layer(input_channel=out_channels, cfg=model_cfg[\"layer5\"])\n\n        exp_channels = min(model_cfg[\"last_layer_exp_factor\"] * out_channels, 960)\n        self.conv_1x1_exp = ConvLayer(\n            in_channels=out_channels,\n            out_channels=exp_channels,\n            kernel_size=1\n        )\n\n        self.classifier = nn.Sequential()\n        self.classifier.add_module(name=\"global_pool\", module=nn.AdaptiveAvgPool2d(1))\n        self.classifier.add_module(name=\"flatten\", module=nn.Flatten())\n        if 0.0 < model_cfg[\"cls_dropout\"] < 1.0:\n            self.classifier.add_module(name=\"dropout\", module=nn.Dropout(p=model_cfg[\"cls_dropout\"]))\n        self.classifier.add_module(name=\"fc\", module=nn.Linear(in_features=exp_channels, out_features=num_classes))\n\n        # weight init\n        self.apply(self.init_parameters)\n\n    def _make_layer(self, input_channel, cfg: Dict) -> Tuple[nn.Sequential, int]:\n        block_type = cfg.get(\"block_type\", \"mobilevit\")\n        if block_type.lower() == \"mobilevit\":\n            return self._make_mit_layer(input_channel=input_channel, cfg=cfg)\n        else:\n            return self._make_mobilenet_layer(input_channel=input_channel, cfg=cfg)\n\n    @staticmethod\n    def _make_mobilenet_layer(input_channel: int, cfg: Dict) -> Tuple[nn.Sequential, int]:\n        output_channels = cfg.get(\"out_channels\")\n        num_blocks = cfg.get(\"num_blocks\", 2)\n        expand_ratio = cfg.get(\"expand_ratio\", 4)\n        block = []\n\n        for i in range(num_blocks):\n            stride = cfg.get(\"stride\", 1) if i == 0 else 1\n\n            layer = InvertedResidual(\n                in_channels=input_channel,\n                out_channels=output_channels,\n                stride=stride,\n                expand_ratio=expand_ratio\n            )\n            block.append(layer)\n            input_channel = output_channels\n\n        return nn.Sequential(*block), input_channel\n\n    @staticmethod\n    def _make_mit_layer(input_channel: int, cfg: Dict) -> [nn.Sequential, int]:\n        stride = cfg.get(\"stride\", 1)\n        block = []\n\n        if stride == 2:\n            layer = InvertedResidual(\n                in_channels=input_channel,\n                out_channels=cfg.get(\"out_channels\"),\n                stride=stride,\n                expand_ratio=cfg.get(\"mv_expand_ratio\", 4)\n            )\n\n            block.append(layer)\n            input_channel = cfg.get(\"out_channels\")\n\n        transformer_dim = cfg[\"transformer_channels\"]\n        ffn_dim = cfg.get(\"ffn_dim\")\n        num_heads = cfg.get(\"num_heads\", 4)\n        head_dim = transformer_dim // num_heads\n\n        if transformer_dim % head_dim != 0:\n            raise ValueError(\"Transformer input dimension should be divisible by head dimension. \"\n                             \"Got {} and {}.\".format(transformer_dim, head_dim))\n\n        block.append(MobileViTBlock(\n            in_channels=input_channel,\n            transformer_dim=transformer_dim,\n            ffn_dim=ffn_dim,\n            n_transformer_blocks=cfg.get(\"transformer_blocks\", 1),\n            patch_h=cfg.get(\"patch_h\", 2),\n            patch_w=cfg.get(\"patch_w\", 2),\n            dropout=cfg.get(\"dropout\", 0.1),\n            ffn_dropout=cfg.get(\"ffn_dropout\", 0.0),\n            attn_dropout=cfg.get(\"attn_dropout\", 0.1),\n            head_dim=head_dim,\n            conv_ksize=3\n        ))\n\n        return nn.Sequential(*block), input_channel\n\n    @staticmethod\n    def init_parameters(m):\n        if isinstance(m, nn.Conv2d):\n            if m.weight is not None:\n                nn.init.kaiming_normal_(m.weight, mode=\"fan_out\")\n            if m.bias is not None:\n                nn.init.zeros_(m.bias)\n        elif isinstance(m, (nn.LayerNorm, nn.BatchNorm2d)):\n            if m.weight is not None:\n                nn.init.ones_(m.weight)\n            if m.bias is not None:\n                nn.init.zeros_(m.bias)\n        elif isinstance(m, (nn.Linear,)):\n            if m.weight is not None:\n                nn.init.trunc_normal_(m.weight, mean=0.0, std=0.02)\n            if m.bias is not None:\n                nn.init.zeros_(m.bias)\n        else:\n            pass\n\n    def forward(self, x: Tensor) -> Tensor:\n        x = self.conv_1(x)\n        x = self.layer_1(x)\n        x = self.layer_2(x)\n\n        x = self.layer_3(x)\n        x = self.layer_4(x)\n        x = self.layer_5(x)\n        x = self.conv_1x1_exp(x)\n        x = self.classifier(x)\n        return x\n\n\ndef mobile_vit_xx_small(num_classes: int = 1000):\n    # pretrain weight link\n    # https://docs-assets.developer.apple.com/ml-research/models/cvnets/classification/mobilevit_xxs.pt\n    config = get_config(\"xx_small\")\n    m = MobileViT(config, num_classes=num_classes)\n    return m\n\n\ndef mobile_vit_x_small(num_classes: int = 1000):\n    # pretrain weight link\n    # https://docs-assets.developer.apple.com/ml-research/models/cvnets/classification/mobilevit_xs.pt\n    config = get_config(\"x_small\")\n    m = MobileViT(config, num_classes=num_classes)\n    return m\n\n\ndef mobile_vit_small(num_classes: int = 1000):\n    # pretrain weight link\n    # https://docs-assets.developer.apple.com/ml-research/models/cvnets/classification/mobilevit_s.pt\n    config = get_config(\"small\")\n    m = MobileViT(config, num_classes=num_classes)\n    return m\n"
  },
  {
    "path": "pytorch_classification/MobileViT/model_config.py",
    "content": "def get_config(mode: str = \"xxs\") -> dict:\n    if mode == \"xx_small\":\n        mv2_exp_mult = 2\n        config = {\n            \"layer1\": {\n                \"out_channels\": 16,\n                \"expand_ratio\": mv2_exp_mult,\n                \"num_blocks\": 1,\n                \"stride\": 1,\n                \"block_type\": \"mv2\",\n            },\n            \"layer2\": {\n                \"out_channels\": 24,\n                \"expand_ratio\": mv2_exp_mult,\n                \"num_blocks\": 3,\n                \"stride\": 2,\n                \"block_type\": \"mv2\",\n            },\n            \"layer3\": {  # 28x28\n                \"out_channels\": 48,\n                \"transformer_channels\": 64,\n                \"ffn_dim\": 128,\n                \"transformer_blocks\": 2,\n                \"patch_h\": 2,  # 8,\n                \"patch_w\": 2,  # 8,\n                \"stride\": 2,\n                \"mv_expand_ratio\": mv2_exp_mult,\n                \"num_heads\": 4,\n                \"block_type\": \"mobilevit\",\n            },\n            \"layer4\": {  # 14x14\n                \"out_channels\": 64,\n                \"transformer_channels\": 80,\n                \"ffn_dim\": 160,\n                \"transformer_blocks\": 4,\n                \"patch_h\": 2,  # 4,\n                \"patch_w\": 2,  # 4,\n                \"stride\": 2,\n                \"mv_expand_ratio\": mv2_exp_mult,\n                \"num_heads\": 4,\n                \"block_type\": \"mobilevit\",\n            },\n            \"layer5\": {  # 7x7\n                \"out_channels\": 80,\n                \"transformer_channels\": 96,\n                \"ffn_dim\": 192,\n                \"transformer_blocks\": 3,\n                \"patch_h\": 2,\n                \"patch_w\": 2,\n                \"stride\": 2,\n                \"mv_expand_ratio\": mv2_exp_mult,\n                \"num_heads\": 4,\n                \"block_type\": \"mobilevit\",\n            },\n            \"last_layer_exp_factor\": 4,\n            \"cls_dropout\": 0.1\n        }\n    elif mode == \"x_small\":\n        mv2_exp_mult = 4\n        config = {\n            \"layer1\": {\n                \"out_channels\": 32,\n                \"expand_ratio\": mv2_exp_mult,\n                \"num_blocks\": 1,\n                \"stride\": 1,\n                \"block_type\": \"mv2\",\n            },\n            \"layer2\": {\n                \"out_channels\": 48,\n                \"expand_ratio\": mv2_exp_mult,\n                \"num_blocks\": 3,\n                \"stride\": 2,\n                \"block_type\": \"mv2\",\n            },\n            \"layer3\": {  # 28x28\n                \"out_channels\": 64,\n                \"transformer_channels\": 96,\n                \"ffn_dim\": 192,\n                \"transformer_blocks\": 2,\n                \"patch_h\": 2,\n                \"patch_w\": 2,\n                \"stride\": 2,\n                \"mv_expand_ratio\": mv2_exp_mult,\n                \"num_heads\": 4,\n                \"block_type\": \"mobilevit\",\n            },\n            \"layer4\": {  # 14x14\n                \"out_channels\": 80,\n                \"transformer_channels\": 120,\n                \"ffn_dim\": 240,\n                \"transformer_blocks\": 4,\n                \"patch_h\": 2,\n                \"patch_w\": 2,\n                \"stride\": 2,\n                \"mv_expand_ratio\": mv2_exp_mult,\n                \"num_heads\": 4,\n                \"block_type\": \"mobilevit\",\n            },\n            \"layer5\": {  # 7x7\n                \"out_channels\": 96,\n                \"transformer_channels\": 144,\n                \"ffn_dim\": 288,\n                \"transformer_blocks\": 3,\n                \"patch_h\": 2,\n                \"patch_w\": 2,\n                \"stride\": 2,\n                \"mv_expand_ratio\": mv2_exp_mult,\n                \"num_heads\": 4,\n                \"block_type\": \"mobilevit\",\n            },\n            \"last_layer_exp_factor\": 4,\n            \"cls_dropout\": 0.1\n        }\n    elif mode == \"small\":\n        mv2_exp_mult = 4\n        config = {\n            \"layer1\": {\n                \"out_channels\": 32,\n                \"expand_ratio\": mv2_exp_mult,\n                \"num_blocks\": 1,\n                \"stride\": 1,\n                \"block_type\": \"mv2\",\n            },\n            \"layer2\": {\n                \"out_channels\": 64,\n                \"expand_ratio\": mv2_exp_mult,\n                \"num_blocks\": 3,\n                \"stride\": 2,\n                \"block_type\": \"mv2\",\n            },\n            \"layer3\": {  # 28x28\n                \"out_channels\": 96,\n                \"transformer_channels\": 144,\n                \"ffn_dim\": 288,\n                \"transformer_blocks\": 2,\n                \"patch_h\": 2,\n                \"patch_w\": 2,\n                \"stride\": 2,\n                \"mv_expand_ratio\": mv2_exp_mult,\n                \"num_heads\": 4,\n                \"block_type\": \"mobilevit\",\n            },\n            \"layer4\": {  # 14x14\n                \"out_channels\": 128,\n                \"transformer_channels\": 192,\n                \"ffn_dim\": 384,\n                \"transformer_blocks\": 4,\n                \"patch_h\": 2,\n                \"patch_w\": 2,\n                \"stride\": 2,\n                \"mv_expand_ratio\": mv2_exp_mult,\n                \"num_heads\": 4,\n                \"block_type\": \"mobilevit\",\n            },\n            \"layer5\": {  # 7x7\n                \"out_channels\": 160,\n                \"transformer_channels\": 240,\n                \"ffn_dim\": 480,\n                \"transformer_blocks\": 3,\n                \"patch_h\": 2,\n                \"patch_w\": 2,\n                \"stride\": 2,\n                \"mv_expand_ratio\": mv2_exp_mult,\n                \"num_heads\": 4,\n                \"block_type\": \"mobilevit\",\n            },\n            \"last_layer_exp_factor\": 4,\n            \"cls_dropout\": 0.1\n        }\n    else:\n        raise NotImplementedError\n\n    for k in [\"layer1\", \"layer2\", \"layer3\", \"layer4\", \"layer5\"]:\n        config[k].update({\"dropout\": 0.1, \"ffn_dropout\": 0.0, \"attn_dropout\": 0.0})\n\n    return config\n"
  },
  {
    "path": "pytorch_classification/MobileViT/my_dataset.py",
    "content": "from PIL import Image\nimport torch\nfrom torch.utils.data import Dataset\n\n\nclass MyDataSet(Dataset):\n    \"\"\"自定义数据集\"\"\"\n\n    def __init__(self, images_path: list, images_class: list, transform=None):\n        self.images_path = images_path\n        self.images_class = images_class\n        self.transform = transform\n\n    def __len__(self):\n        return len(self.images_path)\n\n    def __getitem__(self, item):\n        img = Image.open(self.images_path[item])\n        # RGB为彩色图片，L为灰度图片\n        if img.mode != 'RGB':\n            raise ValueError(\"image: {} isn't RGB mode.\".format(self.images_path[item]))\n        label = self.images_class[item]\n\n        if self.transform is not None:\n            img = self.transform(img)\n\n        return img, label\n\n    @staticmethod\n    def collate_fn(batch):\n        # 官方实现的default_collate可以参考\n        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py\n        images, labels = tuple(zip(*batch))\n\n        images = torch.stack(images, dim=0)\n        labels = torch.as_tensor(labels)\n        return images, labels\n"
  },
  {
    "path": "pytorch_classification/MobileViT/predict.py",
    "content": "import os\nimport json\n\nimport torch\nfrom PIL import Image\nfrom torchvision import transforms\nimport matplotlib.pyplot as plt\n\nfrom model import mobile_vit_xx_small as create_model\n\n\ndef main():\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n\n    img_size = 224\n    data_transform = transforms.Compose(\n        [transforms.Resize(int(img_size * 1.14)),\n         transforms.CenterCrop(img_size),\n         transforms.ToTensor(),\n         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n    plt.imshow(img)\n    # [N, C, H, W]\n    img = data_transform(img)\n    # expand batch dimension\n    img = torch.unsqueeze(img, dim=0)\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    # create model\n    model = create_model(num_classes=5).to(device)\n    # load model weights\n    model_weight_path = \"./weights/best_model.pth\"\n    model.load_state_dict(torch.load(model_weight_path, map_location=device))\n    model.eval()\n    with torch.no_grad():\n        # predict class\n        output = torch.squeeze(model(img.to(device))).cpu()\n        predict = torch.softmax(output, dim=0)\n        predict_cla = torch.argmax(predict).numpy()\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_cla)],\n                                                 predict[predict_cla].numpy())\n    plt.title(print_res)\n    for i in range(len(predict)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  predict[i].numpy()))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/MobileViT/train.py",
    "content": "import os\nimport argparse\n\nimport torch\nimport torch.optim as optim\nfrom torch.utils.tensorboard import SummaryWriter\nfrom torchvision import transforms\n\nfrom my_dataset import MyDataSet\nfrom model import mobile_vit_xx_small as create_model\nfrom utils import read_split_data, train_one_epoch, evaluate\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n\n    if os.path.exists(\"./weights\") is False:\n        os.makedirs(\"./weights\")\n\n    tb_writer = SummaryWriter()\n\n    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)\n\n    img_size = 224\n    data_transform = {\n        \"train\": transforms.Compose([transforms.RandomResizedCrop(img_size),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),\n        \"val\": transforms.Compose([transforms.Resize(int(img_size * 1.143)),\n                                   transforms.CenterCrop(img_size),\n                                   transforms.ToTensor(),\n                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}\n\n    # 实例化训练数据集\n    train_dataset = MyDataSet(images_path=train_images_path,\n                              images_class=train_images_label,\n                              transform=data_transform[\"train\"])\n\n    # 实例化验证数据集\n    val_dataset = MyDataSet(images_path=val_images_path,\n                            images_class=val_images_label,\n                            transform=data_transform[\"val\"])\n\n    batch_size = args.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using {} dataloader workers every process'.format(nw))\n    train_loader = torch.utils.data.DataLoader(train_dataset,\n                                               batch_size=batch_size,\n                                               shuffle=True,\n                                               pin_memory=True,\n                                               num_workers=nw,\n                                               collate_fn=train_dataset.collate_fn)\n\n    val_loader = torch.utils.data.DataLoader(val_dataset,\n                                             batch_size=batch_size,\n                                             shuffle=False,\n                                             pin_memory=True,\n                                             num_workers=nw,\n                                             collate_fn=val_dataset.collate_fn)\n\n    model = create_model(num_classes=args.num_classes).to(device)\n\n    if args.weights != \"\":\n        assert os.path.exists(args.weights), \"weights file: '{}' not exist.\".format(args.weights)\n        weights_dict = torch.load(args.weights, map_location=device)\n        weights_dict = weights_dict[\"model\"] if \"model\" in weights_dict else weights_dict\n        # 删除有关分类类别的权重\n        for k in list(weights_dict.keys()):\n            if \"classifier\" in k:\n                del weights_dict[k]\n        print(model.load_state_dict(weights_dict, strict=False))\n\n    if args.freeze_layers:\n        for name, para in model.named_parameters():\n            # 除head外，其他权重全部冻结\n            if \"classifier\" not in name:\n                para.requires_grad_(False)\n            else:\n                print(\"training {}\".format(name))\n\n    pg = [p for p in model.parameters() if p.requires_grad]\n    optimizer = optim.AdamW(pg, lr=args.lr, weight_decay=1E-2)\n\n    best_acc = 0.\n    for epoch in range(args.epochs):\n        # train\n        train_loss, train_acc = train_one_epoch(model=model,\n                                                optimizer=optimizer,\n                                                data_loader=train_loader,\n                                                device=device,\n                                                epoch=epoch)\n\n        # validate\n        val_loss, val_acc = evaluate(model=model,\n                                     data_loader=val_loader,\n                                     device=device,\n                                     epoch=epoch)\n\n        tags = [\"train_loss\", \"train_acc\", \"val_loss\", \"val_acc\", \"learning_rate\"]\n        tb_writer.add_scalar(tags[0], train_loss, epoch)\n        tb_writer.add_scalar(tags[1], train_acc, epoch)\n        tb_writer.add_scalar(tags[2], val_loss, epoch)\n        tb_writer.add_scalar(tags[3], val_acc, epoch)\n        tb_writer.add_scalar(tags[4], optimizer.param_groups[0][\"lr\"], epoch)\n\n        if val_acc > best_acc:\n            best_acc = val_acc\n            torch.save(model.state_dict(), \"./weights/best_model.pth\")\n\n        torch.save(model.state_dict(), \"./weights/latest_model.pth\")\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--num_classes', type=int, default=5)\n    parser.add_argument('--epochs', type=int, default=10)\n    parser.add_argument('--batch-size', type=int, default=8)\n    parser.add_argument('--lr', type=float, default=0.0002)\n\n    # 数据集所在根目录\n    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz\n    parser.add_argument('--data-path', type=str,\n                        default=\"/data/flower_photos\")\n\n    # 预训练权重路径，如果不想载入就设置为空字符\n    parser.add_argument('--weights', type=str, default='./mobilevit_xxs.pt',\n                        help='initial weights path')\n    # 是否冻结权重\n    parser.add_argument('--freeze-layers', type=bool, default=False)\n    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')\n\n    opt = parser.parse_args()\n\n    main(opt)\n"
  },
  {
    "path": "pytorch_classification/MobileViT/transformer.py",
    "content": "from typing import Optional\n\nimport torch\nimport torch.nn as nn\nfrom torch import Tensor\n\n\nclass MultiHeadAttention(nn.Module):\n    \"\"\"\n    This layer applies a multi-head self- or cross-attention as described in\n    `Attention is all you need <https://arxiv.org/abs/1706.03762>`_ paper\n\n    Args:\n        embed_dim (int): :math:`C_{in}` from an expected input of size :math:`(N, P, C_{in})`\n        num_heads (int): Number of heads in multi-head attention\n        attn_dropout (float): Attention dropout. Default: 0.0\n        bias (bool): Use bias or not. Default: ``True``\n\n    Shape:\n        - Input: :math:`(N, P, C_{in})` where :math:`N` is batch size, :math:`P` is number of patches,\n        and :math:`C_{in}` is input embedding dim\n        - Output: same shape as the input\n\n    \"\"\"\n\n    def __init__(\n        self,\n        embed_dim: int,\n        num_heads: int,\n        attn_dropout: float = 0.0,\n        bias: bool = True,\n        *args,\n        **kwargs\n    ) -> None:\n        super().__init__()\n        if embed_dim % num_heads != 0:\n            raise ValueError(\n                \"Embedding dim must be divisible by number of heads in {}. Got: embed_dim={} and num_heads={}\".format(\n                    self.__class__.__name__, embed_dim, num_heads\n                )\n            )\n\n        self.qkv_proj = nn.Linear(in_features=embed_dim, out_features=3 * embed_dim, bias=bias)\n\n        self.attn_dropout = nn.Dropout(p=attn_dropout)\n        self.out_proj = nn.Linear(in_features=embed_dim, out_features=embed_dim, bias=bias)\n\n        self.head_dim = embed_dim // num_heads\n        self.scaling = self.head_dim ** -0.5\n        self.softmax = nn.Softmax(dim=-1)\n        self.num_heads = num_heads\n        self.embed_dim = embed_dim\n\n    def forward(self, x_q: Tensor) -> Tensor:\n        # [N, P, C]\n        b_sz, n_patches, in_channels = x_q.shape\n\n        # self-attention\n        # [N, P, C] -> [N, P, 3C] -> [N, P, 3, h, c] where C = hc\n        qkv = self.qkv_proj(x_q).reshape(b_sz, n_patches, 3, self.num_heads, -1)\n\n        # [N, P, 3, h, c] -> [N, h, 3, P, C]\n        qkv = qkv.transpose(1, 3).contiguous()\n\n        # [N, h, 3, P, C] -> [N, h, P, C] x 3\n        query, key, value = qkv[:, :, 0], qkv[:, :, 1], qkv[:, :, 2]\n\n        query = query * self.scaling\n\n        # [N h, P, c] -> [N, h, c, P]\n        key = key.transpose(-1, -2)\n\n        # QK^T\n        # [N, h, P, c] x [N, h, c, P] -> [N, h, P, P]\n        attn = torch.matmul(query, key)\n        attn = self.softmax(attn)\n        attn = self.attn_dropout(attn)\n\n        # weighted sum\n        # [N, h, P, P] x [N, h, P, c] -> [N, h, P, c]\n        out = torch.matmul(attn, value)\n\n        # [N, h, P, c] -> [N, P, h, c] -> [N, P, C]\n        out = out.transpose(1, 2).reshape(b_sz, n_patches, -1)\n        out = self.out_proj(out)\n\n        return out\n\n\nclass TransformerEncoder(nn.Module):\n    \"\"\"\n    This class defines the pre-norm `Transformer encoder <https://arxiv.org/abs/1706.03762>`_\n    Args:\n        embed_dim (int): :math:`C_{in}` from an expected input of size :math:`(N, P, C_{in})`\n        ffn_latent_dim (int): Inner dimension of the FFN\n        num_heads (int) : Number of heads in multi-head attention. Default: 8\n        attn_dropout (float): Dropout rate for attention in multi-head attention. Default: 0.0\n        dropout (float): Dropout rate. Default: 0.0\n        ffn_dropout (float): Dropout between FFN layers. Default: 0.0\n\n    Shape:\n        - Input: :math:`(N, P, C_{in})` where :math:`N` is batch size, :math:`P` is number of patches,\n        and :math:`C_{in}` is input embedding dim\n        - Output: same shape as the input\n    \"\"\"\n\n    def __init__(\n        self,\n        embed_dim: int,\n        ffn_latent_dim: int,\n        num_heads: Optional[int] = 8,\n        attn_dropout: Optional[float] = 0.0,\n        dropout: Optional[float] = 0.0,\n        ffn_dropout: Optional[float] = 0.0,\n        *args,\n        **kwargs\n    ) -> None:\n\n        super().__init__()\n\n        attn_unit = MultiHeadAttention(\n            embed_dim,\n            num_heads,\n            attn_dropout=attn_dropout,\n            bias=True\n        )\n\n        self.pre_norm_mha = nn.Sequential(\n            nn.LayerNorm(embed_dim),\n            attn_unit,\n            nn.Dropout(p=dropout)\n        )\n\n        self.pre_norm_ffn = nn.Sequential(\n            nn.LayerNorm(embed_dim),\n            nn.Linear(in_features=embed_dim, out_features=ffn_latent_dim, bias=True),\n            nn.SiLU(),\n            nn.Dropout(p=ffn_dropout),\n            nn.Linear(in_features=ffn_latent_dim, out_features=embed_dim, bias=True),\n            nn.Dropout(p=dropout)\n        )\n        self.embed_dim = embed_dim\n        self.ffn_dim = ffn_latent_dim\n        self.ffn_dropout = ffn_dropout\n        self.std_dropout = dropout\n\n    def forward(self, x: Tensor) -> Tensor:\n        # multi-head attention\n        res = x\n        x = self.pre_norm_mha(x)\n        x = x + res\n\n        # feed forward network\n        x = x + self.pre_norm_ffn(x)\n        return x\n"
  },
  {
    "path": "pytorch_classification/MobileViT/unfold_test.py",
    "content": "import time\nimport torch\n\nbatch_size = 8\nin_channels = 32\npatch_h = 2\npatch_w = 2\nnum_patch_h = 16\nnum_patch_w = 16\nnum_patches = num_patch_h * num_patch_w\npatch_area = patch_h * patch_w\n\n\ndef official(x: torch.Tensor):\n    # [B, C, H, W] -> [B * C * n_h, p_h, n_w, p_w]\n    x = x.reshape(batch_size * in_channels * num_patch_h, patch_h, num_patch_w, patch_w)\n    # [B * C * n_h, p_h, n_w, p_w] -> [B * C * n_h, n_w, p_h, p_w]\n    x = x.transpose(1, 2)\n    # [B * C * n_h, n_w, p_h, p_w] -> [B, C, N, P] where P = p_h * p_w and N = n_h * n_w\n    x = x.reshape(batch_size, in_channels, num_patches, patch_area)\n    # [B, C, N, P] -> [B, P, N, C]\n    x = x.transpose(1, 3)\n    # [B, P, N, C] -> [BP, N, C]\n    x = x.reshape(batch_size * patch_area, num_patches, -1)\n\n    return x\n\n\ndef my_self(x: torch.Tensor):\n    # [B, C, H, W] -> [B, C, n_h, p_h, n_w, p_w]\n    x = x.reshape(batch_size, in_channels, num_patch_h, patch_h, num_patch_w, patch_w)\n    # [B, C, n_h, p_h, n_w, p_w] -> [B, C, n_h, n_w, p_h, p_w]\n    x = x.transpose(3, 4)\n    # [B, C, n_h, n_w, p_h, p_w] -> [B, C, N, P] where P = p_h * p_w and N = n_h * n_w\n    x = x.reshape(batch_size, in_channels, num_patches, patch_area)\n    # [B, C, N, P] -> [B, P, N, C]\n    x = x.transpose(1, 3)\n    # [B, P, N, C] -> [BP, N, C]\n    x = x.reshape(batch_size * patch_area, num_patches, -1)\n\n    return x\n\n\nif __name__ == '__main__':\n    t = torch.randn(batch_size, in_channels, num_patch_h * patch_h, num_patch_w * patch_w)\n    print(torch.equal(official(t), my_self(t)))\n\n    t1 = time.time()\n    for _ in range(1000):\n        official(t)\n    print(f\"official time: {time.time() - t1}\")\n\n    t1 = time.time()\n    for _ in range(1000):\n        my_self(t)\n    print(f\"self time: {time.time() - t1}\")\n"
  },
  {
    "path": "pytorch_classification/MobileViT/utils.py",
    "content": "import os\nimport sys\nimport json\nimport pickle\nimport random\n\nimport torch\nfrom tqdm import tqdm\n\nimport matplotlib.pyplot as plt\n\n\ndef read_split_data(root: str, val_rate: float = 0.2):\n    random.seed(0)  # 保证随机结果可复现\n    assert os.path.exists(root), \"dataset root: {} does not exist.\".format(root)\n\n    # 遍历文件夹，一个文件夹对应一个类别\n    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]\n    # 排序，保证各平台顺序一致\n    flower_class.sort()\n    # 生成类别名称以及对应的数字索引\n    class_indices = dict((k, v) for v, k in enumerate(flower_class))\n    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    train_images_path = []  # 存储训练集的所有图片路径\n    train_images_label = []  # 存储训练集图片对应索引信息\n    val_images_path = []  # 存储验证集的所有图片路径\n    val_images_label = []  # 存储验证集图片对应索引信息\n    every_class_num = []  # 存储每个类别的样本总数\n    supported = [\".jpg\", \".JPG\", \".png\", \".PNG\"]  # 支持的文件后缀类型\n    # 遍历每个文件夹下的文件\n    for cla in flower_class:\n        cla_path = os.path.join(root, cla)\n        # 遍历获取supported支持的所有文件路径\n        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)\n                  if os.path.splitext(i)[-1] in supported]\n        # 排序，保证各平台顺序一致\n        images.sort()\n        # 获取该类别对应的索引\n        image_class = class_indices[cla]\n        # 记录该类别的样本数量\n        every_class_num.append(len(images))\n        # 按比例随机采样验证样本\n        val_path = random.sample(images, k=int(len(images) * val_rate))\n\n        for img_path in images:\n            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集\n                val_images_path.append(img_path)\n                val_images_label.append(image_class)\n            else:  # 否则存入训练集\n                train_images_path.append(img_path)\n                train_images_label.append(image_class)\n\n    print(\"{} images were found in the dataset.\".format(sum(every_class_num)))\n    print(\"{} images for training.\".format(len(train_images_path)))\n    print(\"{} images for validation.\".format(len(val_images_path)))\n    assert len(train_images_path) > 0, \"number of training images must greater than 0.\"\n    assert len(val_images_path) > 0, \"number of validation images must greater than 0.\"\n\n    plot_image = False\n    if plot_image:\n        # 绘制每种类别个数柱状图\n        plt.bar(range(len(flower_class)), every_class_num, align='center')\n        # 将横坐标0,1,2,3,4替换为相应的类别名称\n        plt.xticks(range(len(flower_class)), flower_class)\n        # 在柱状图上添加数值标签\n        for i, v in enumerate(every_class_num):\n            plt.text(x=i, y=v + 5, s=str(v), ha='center')\n        # 设置x坐标\n        plt.xlabel('image class')\n        # 设置y坐标\n        plt.ylabel('number of images')\n        # 设置柱状图的标题\n        plt.title('flower class distribution')\n        plt.show()\n\n    return train_images_path, train_images_label, val_images_path, val_images_label\n\n\ndef plot_data_loader_image(data_loader):\n    batch_size = data_loader.batch_size\n    plot_num = min(batch_size, 4)\n\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), json_path + \" does not exist.\"\n    json_file = open(json_path, 'r')\n    class_indices = json.load(json_file)\n\n    for data in data_loader:\n        images, labels = data\n        for i in range(plot_num):\n            # [C, H, W] -> [H, W, C]\n            img = images[i].numpy().transpose(1, 2, 0)\n            # 反Normalize操作\n            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255\n            label = labels[i].item()\n            plt.subplot(1, plot_num, i+1)\n            plt.xlabel(class_indices[str(label)])\n            plt.xticks([])  # 去掉x轴的刻度\n            plt.yticks([])  # 去掉y轴的刻度\n            plt.imshow(img.astype('uint8'))\n        plt.show()\n\n\ndef write_pickle(list_info: list, file_name: str):\n    with open(file_name, 'wb') as f:\n        pickle.dump(list_info, f)\n\n\ndef read_pickle(file_name: str) -> list:\n    with open(file_name, 'rb') as f:\n        info_list = pickle.load(f)\n        return info_list\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch):\n    model.train()\n    loss_function = torch.nn.CrossEntropyLoss(label_smoothing=0.1)\n    accu_loss = torch.zeros(1).to(device)  # 累计损失\n    accu_num = torch.zeros(1).to(device)   # 累计预测正确的样本数\n    optimizer.zero_grad()\n\n    sample_num = 0\n    data_loader = tqdm(data_loader, file=sys.stdout)\n    for step, data in enumerate(data_loader):\n        images, labels = data\n        sample_num += images.shape[0]\n\n        pred = model(images.to(device))\n        pred_classes = torch.max(pred, dim=1)[1]\n        accu_num += torch.eq(pred_classes, labels.to(device)).sum()\n\n        loss = loss_function(pred, labels.to(device))\n        loss.backward()\n        accu_loss += loss.detach()\n\n        data_loader.desc = \"[train epoch {}] loss: {:.3f}, acc: {:.3f}\".format(epoch,\n                                                                               accu_loss.item() / (step + 1),\n                                                                               accu_num.item() / sample_num)\n\n        if not torch.isfinite(loss):\n            print('WARNING: non-finite loss, ending training ', loss)\n            sys.exit(1)\n\n        optimizer.step()\n        optimizer.zero_grad()\n\n    return accu_loss.item() / (step + 1), accu_num.item() / sample_num\n\n\n@torch.no_grad()\ndef evaluate(model, data_loader, device, epoch):\n    loss_function = torch.nn.CrossEntropyLoss()\n\n    model.eval()\n\n    accu_num = torch.zeros(1).to(device)   # 累计预测正确的样本数\n    accu_loss = torch.zeros(1).to(device)  # 累计损失\n\n    sample_num = 0\n    data_loader = tqdm(data_loader, file=sys.stdout)\n    for step, data in enumerate(data_loader):\n        images, labels = data\n        sample_num += images.shape[0]\n\n        pred = model(images.to(device))\n        pred_classes = torch.max(pred, dim=1)[1]\n        accu_num += torch.eq(pred_classes, labels.to(device)).sum()\n\n        loss = loss_function(pred, labels.to(device))\n        accu_loss += loss\n\n        data_loader.desc = \"[valid epoch {}] loss: {:.3f}, acc: {:.3f}\".format(epoch,\n                                                                               accu_loss.item() / (step + 1),\n                                                                               accu_num.item() / sample_num)\n\n    return accu_loss.item() / (step + 1), accu_num.item() / sample_num\n"
  },
  {
    "path": "pytorch_classification/README.md",
    "content": "## 该文件夹存放使用pytorch实现的代码版本\n**model.py**： 是模型文件  \n**train.py**： 是调用模型训练的文件    \n**predict.py**： 是调用模型进行预测的文件  \n**class_indices.json**： 是训练数据集对应的标签文件   \n\n------\n若要使用该训练脚本需要下载对应的花分类数据集并将其划分为训练集和验证集。   \n[点击这里](../data_set/README.md)会告诉你如何去下载数据集，以及提供了现成的划分数据集脚本  "
  },
  {
    "path": "pytorch_classification/Test10_regnet/README.md",
    "content": "## 代码使用简介\n\n1. 下载好数据集，代码中默认使用的是花分类数据集，下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz),\n如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0\n2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径\n3. 下载预训练权重，根据自己使用的模型下载对应预训练权重: https://pan.baidu.com/s/1XTo3walj9ai7ZhWz7jh-YA  密码: 8lmu\n4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径\n5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件)\n6. 在`predict.py`脚本中导入和训练脚本中同样的模型，并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下)\n7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径\n8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了\n9. 如果要使用自己的数据集，请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹)，并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数\n"
  },
  {
    "path": "pytorch_classification/Test10_regnet/model.py",
    "content": "from typing import Optional\n\nimport numpy as np\nimport torch\nimport torch.nn as nn\nfrom torch import Tensor\n\n\ndef _make_divisible(ch, divisor=8, min_ch=None):\n    \"\"\"\n    This function is taken from the original tf repo.\n    It ensures that all layers have a channel number that is divisible by 8\n    It can be seen here:\n    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py\n    \"\"\"\n    if min_ch is None:\n        min_ch = divisor\n    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)\n    # Make sure that round down does not go down by more than 10%.\n    if new_ch < 0.9 * ch:\n        new_ch += divisor\n    return new_ch\n\n\ndef _mcfg(**kwargs):\n    cfg = dict(se_ratio=0., bottle_ratio=1., stem_width=32)\n    cfg.update(**kwargs)\n    return cfg\n\n\nmodel_cfgs = {\n    \"regnetx_200mf\": _mcfg(w0=24, wa=36.44, wm=2.49, group_w=8, depth=13),\n    \"regnetx_400mf\": _mcfg(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22),\n    \"regnetx_600mf\": _mcfg(w0=48, wa=36.97, wm=2.24, group_w=24, depth=16),\n    \"regnetx_800mf\": _mcfg(w0=56, wa=35.73, wm=2.28, group_w=16, depth=16),\n    \"regnetx_1.6gf\": _mcfg(w0=80, wa=34.01, wm=2.25, group_w=24, depth=18),\n    \"regnetx_3.2gf\": _mcfg(w0=88, wa=26.31, wm=2.25, group_w=48, depth=25),\n    \"regnetx_4.0gf\": _mcfg(w0=96, wa=38.65, wm=2.43, group_w=40, depth=23),\n    \"regnetx_6.4gf\": _mcfg(w0=184, wa=60.83, wm=2.07, group_w=56, depth=17),\n    \"regnetx_8.0gf\": _mcfg(w0=80, wa=49.56, wm=2.88, group_w=120, depth=23),\n    \"regnetx_12gf\": _mcfg(w0=168, wa=73.36, wm=2.37, group_w=112, depth=19),\n    \"regnetx_16gf\": _mcfg(w0=216, wa=55.59, wm=2.1, group_w=128, depth=22),\n    \"regnetx_32gf\": _mcfg(w0=320, wa=69.86, wm=2.0, group_w=168, depth=23),\n    \"regnety_200mf\": _mcfg(w0=24, wa=36.44, wm=2.49, group_w=8, depth=13, se_ratio=0.25),\n    \"regnety_400mf\": _mcfg(w0=48, wa=27.89, wm=2.09, group_w=8, depth=16, se_ratio=0.25),\n    \"regnety_600mf\": _mcfg(w0=48, wa=32.54, wm=2.32, group_w=16, depth=15, se_ratio=0.25),\n    \"regnety_800mf\": _mcfg(w0=56, wa=38.84, wm=2.4, group_w=16, depth=14, se_ratio=0.25),\n    \"regnety_1.6gf\": _mcfg(w0=48, wa=20.71, wm=2.65, group_w=24, depth=27, se_ratio=0.25),\n    \"regnety_3.2gf\": _mcfg(w0=80, wa=42.63, wm=2.66, group_w=24, depth=21, se_ratio=0.25),\n    \"regnety_4.0gf\": _mcfg(w0=96, wa=31.41, wm=2.24, group_w=64, depth=22, se_ratio=0.25),\n    \"regnety_6.4gf\": _mcfg(w0=112, wa=33.22, wm=2.27, group_w=72, depth=25, se_ratio=0.25),\n    \"regnety_8.0gf\": _mcfg(w0=192, wa=76.82, wm=2.19, group_w=56, depth=17, se_ratio=0.25),\n    \"regnety_12gf\": _mcfg(w0=168, wa=73.36, wm=2.37, group_w=112, depth=19, se_ratio=0.25),\n    \"regnety_16gf\": _mcfg(w0=200, wa=106.23, wm=2.48, group_w=112, depth=18, se_ratio=0.25),\n    \"regnety_32gf\": _mcfg(w0=232, wa=115.89, wm=2.53, group_w=232, depth=20, se_ratio=0.25)\n}\n\n\ndef generate_width_depth(wa, w0, wm, depth, q=8):\n    \"\"\"Generates per block widths from RegNet parameters.\"\"\"\n    assert wa > 0 and w0 > 0 and wm > 1 and w0 % q == 0\n    widths_cont = np.arange(depth) * wa + w0\n    width_exps = np.round(np.log(widths_cont / w0) / np.log(wm))\n    widths_j = w0 * np.power(wm, width_exps)\n    widths_j = np.round(np.divide(widths_j, q)) * q\n    num_stages, max_stage = len(np.unique(widths_j)), width_exps.max() + 1\n    assert num_stages == int(max_stage)\n    assert num_stages == 4\n    widths = widths_j.astype(int).tolist()\n    return widths, num_stages\n\n\ndef adjust_width_groups_comp(widths: list, groups: list):\n    \"\"\"Adjusts the compatibility of widths and groups.\"\"\"\n    groups = [min(g, w_bot) for g, w_bot in zip(groups, widths)]\n    # Adjust w to an integral multiple of g\n    widths = [int(round(w / g) * g) for w, g in zip(widths, groups)]\n    return widths, groups\n\n\nclass ConvBNAct(nn.Module):\n    def __init__(self,\n                 in_c: int,\n                 out_c: int,\n                 kernel_s: int = 1,\n                 stride: int = 1,\n                 padding: int = 0,\n                 groups: int = 1,\n                 act: Optional[nn.Module] = nn.ReLU(inplace=True)):\n        super(ConvBNAct, self).__init__()\n\n        self.conv = nn.Conv2d(in_channels=in_c,\n                              out_channels=out_c,\n                              kernel_size=kernel_s,\n                              stride=stride,\n                              padding=padding,\n                              groups=groups,\n                              bias=False)\n\n        self.bn = nn.BatchNorm2d(out_c)\n        self.act = act if act is not None else nn.Identity()\n\n    def forward(self, x: Tensor) -> Tensor:\n        x = self.conv(x)\n        x = self.bn(x)\n        x = self.act(x)\n        return x\n\n\nclass RegHead(nn.Module):\n    def __init__(self,\n                 in_unit: int = 368,\n                 out_unit: int = 1000,\n                 output_size: tuple = (1, 1),\n                 drop_ratio: float = 0.25):\n        super(RegHead, self).__init__()\n        self.pool = nn.AdaptiveAvgPool2d(output_size)\n\n        if drop_ratio > 0:\n            self.dropout = nn.Dropout(p=drop_ratio)\n        else:\n            self.dropout = nn.Identity()\n\n        self.fc = nn.Linear(in_features=in_unit, out_features=out_unit)\n\n    def forward(self, x: Tensor) -> Tensor:\n        x = self.pool(x)\n        x = torch.flatten(x, start_dim=1)\n        x = self.dropout(x)\n        x = self.fc(x)\n        return x\n\n\nclass SqueezeExcitation(nn.Module):\n    def __init__(self, input_c: int, expand_c: int, se_ratio: float = 0.25):\n        super(SqueezeExcitation, self).__init__()\n        squeeze_c = int(input_c * se_ratio)\n        self.fc1 = nn.Conv2d(expand_c, squeeze_c, 1)\n        self.ac1 = nn.ReLU(inplace=True)\n        self.fc2 = nn.Conv2d(squeeze_c, expand_c, 1)\n        self.ac2 = nn.Sigmoid()\n\n    def forward(self, x: Tensor) -> Tensor:\n        scale = x.mean((2, 3), keepdim=True)\n        scale = self.fc1(scale)\n        scale = self.ac1(scale)\n        scale = self.fc2(scale)\n        scale = self.ac2(scale)\n        return scale * x\n\n\nclass Bottleneck(nn.Module):\n    def __init__(self,\n                 in_c: int,\n                 out_c: int,\n                 stride: int = 1,\n                 group_width: int = 1,\n                 se_ratio: float = 0.,\n                 drop_ratio: float = 0.):\n        super(Bottleneck, self).__init__()\n\n        self.conv1 = ConvBNAct(in_c=in_c, out_c=out_c, kernel_s=1)\n        self.conv2 = ConvBNAct(in_c=out_c,\n                               out_c=out_c,\n                               kernel_s=3,\n                               stride=stride,\n                               padding=1,\n                               groups=out_c // group_width)\n\n        if se_ratio > 0:\n            self.se = SqueezeExcitation(in_c, out_c, se_ratio)\n        else:\n            self.se = nn.Identity()\n\n        self.conv3 = ConvBNAct(in_c=out_c, out_c=out_c, kernel_s=1, act=None)\n        self.ac3 = nn.ReLU(inplace=True)\n\n        if drop_ratio > 0:\n            self.dropout = nn.Dropout(p=drop_ratio)\n        else:\n            self.dropout = nn.Identity()\n\n        if (in_c != out_c) or (stride != 1):\n            self.downsample = ConvBNAct(in_c=in_c, out_c=out_c, kernel_s=1, stride=stride, act=None)\n        else:\n            self.downsample = nn.Identity()\n\n    def zero_init_last_bn(self):\n        nn.init.zeros_(self.conv3.bn.weight)\n\n    def forward(self, x: Tensor) -> Tensor:\n        shortcut = x\n        x = self.conv1(x)\n        x = self.conv2(x)\n\n        x = self.se(x)\n        x = self.conv3(x)\n\n        x = self.dropout(x)\n\n        shortcut = self.downsample(shortcut)\n\n        x += shortcut\n        x = self.ac3(x)\n        return x\n\n\nclass RegStage(nn.Module):\n    def __init__(self,\n                 in_c: int,\n                 out_c: int,\n                 depth: int,\n                 group_width: int,\n                 se_ratio: float):\n        super(RegStage, self).__init__()\n        for i in range(depth):\n            block_stride = 2 if i == 0 else 1\n            block_in_c = in_c if i == 0 else out_c\n\n            name = \"b{}\".format(i + 1)\n            self.add_module(name,\n                            Bottleneck(in_c=block_in_c,\n                                       out_c=out_c,\n                                       stride=block_stride,\n                                       group_width=group_width,\n                                       se_ratio=se_ratio))\n\n    def forward(self, x: Tensor) -> Tensor:\n        for block in self.children():\n            x = block(x)\n        return x\n\n\nclass RegNet(nn.Module):\n    \"\"\"RegNet model.\n\n    Paper: https://arxiv.org/abs/2003.13678\n    Original Impl: https://github.com/facebookresearch/pycls/blob/master/pycls/models/regnet.py\n    and refer to: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/regnet.py\n    \"\"\"\n\n    def __init__(self,\n                 cfg: dict,\n                 in_c: int = 3,\n                 num_classes: int = 1000,\n                 zero_init_last_bn: bool = True):\n        super(RegNet, self).__init__()\n\n        # RegStem\n        stem_c = cfg[\"stem_width\"]\n        self.stem = ConvBNAct(in_c, out_c=stem_c, kernel_s=3, stride=2, padding=1)\n\n        # build stages\n        input_channels = stem_c\n        stage_info = self._build_stage_info(cfg)\n        for i, stage_args in enumerate(stage_info):\n            stage_name = \"s{}\".format(i + 1)\n            self.add_module(stage_name, RegStage(in_c=input_channels, **stage_args))\n            input_channels = stage_args[\"out_c\"]\n\n        # RegHead\n        self.head = RegHead(in_unit=input_channels, out_unit=num_classes)\n\n        # initial weights\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_uniform_(m.weight, mode=\"fan_out\",  nonlinearity='relu')\n            elif isinstance(m, nn.BatchNorm2d):\n                nn.init.ones_(m.weight)\n                nn.init.zeros_(m.bias)\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, mean=0.0, std=0.01)\n                nn.init.zeros_(m.bias)\n\n        if zero_init_last_bn:\n            for m in self.modules():\n                if hasattr(m, \"zero_init_last_bn\"):\n                    m.zero_init_last_bn()\n\n    def forward(self, x: Tensor) -> Tensor:\n        for layer in self.children():\n            x = layer(x)\n        return x\n\n    @staticmethod\n    def _build_stage_info(cfg: dict):\n        wa, w0, wm, d = cfg[\"wa\"], cfg[\"w0\"], cfg[\"wm\"], cfg[\"depth\"]\n        widths, num_stages = generate_width_depth(wa, w0, wm, d)\n\n        stage_widths, stage_depths = np.unique(widths, return_counts=True)\n        stage_groups = [cfg['group_w'] for _ in range(num_stages)]\n        stage_widths, stage_groups = adjust_width_groups_comp(stage_widths, stage_groups)\n\n        info = []\n        for i in range(num_stages):\n            info.append(dict(out_c=stage_widths[i],\n                             depth=stage_depths[i],\n                             group_width=stage_groups[i],\n                             se_ratio=cfg[\"se_ratio\"]))\n\n        return info\n\n\ndef create_regnet(model_name=\"RegNetX_200MF\", num_classes=1000):\n    model_name = model_name.lower().replace(\"-\", \"_\")\n    if model_name not in model_cfgs.keys():\n        print(\"support model name: \\n{}\".format(\"\\n\".join(model_cfgs.keys())))\n        raise KeyError(\"not support model name: {}\".format(model_name))\n\n    model = RegNet(cfg=model_cfgs[model_name], num_classes=num_classes)\n    return model\n"
  },
  {
    "path": "pytorch_classification/Test10_regnet/my_dataset.py",
    "content": "from PIL import Image\nimport torch\nfrom torch.utils.data import Dataset\n\n\nclass MyDataSet(Dataset):\n    \"\"\"自定义数据集\"\"\"\n\n    def __init__(self, images_path: list, images_class: list, transform=None):\n        self.images_path = images_path\n        self.images_class = images_class\n        self.transform = transform\n\n    def __len__(self):\n        return len(self.images_path)\n\n    def __getitem__(self, item):\n        img = Image.open(self.images_path[item])\n        # RGB为彩色图片，L为灰度图片\n        if img.mode != 'RGB':\n            raise ValueError(\"image: {} isn't RGB mode.\".format(self.images_path[item]))\n        label = self.images_class[item]\n\n        if self.transform is not None:\n            img = self.transform(img)\n\n        return img, label\n\n    @staticmethod\n    def collate_fn(batch):\n        # 官方实现的default_collate可以参考\n        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py\n        images, labels = tuple(zip(*batch))\n\n        images = torch.stack(images, dim=0)\n        labels = torch.as_tensor(labels)\n        return images, labels\n"
  },
  {
    "path": "pytorch_classification/Test10_regnet/predict.py",
    "content": "import os\nimport json\n\nimport torch\nfrom PIL import Image\nfrom torchvision import transforms\nimport matplotlib.pyplot as plt\n\nfrom model import create_regnet\n\n\ndef main():\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n\n    data_transform = transforms.Compose(\n        [transforms.Resize(256),\n         transforms.CenterCrop(224),\n         transforms.ToTensor(),\n         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n    plt.imshow(img)\n    # [N, C, H, W]\n    img = data_transform(img)\n    # expand batch dimension\n    img = torch.unsqueeze(img, dim=0)\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    # create model\n    model = create_regnet(model_name=\"RegNetY_400MF\", num_classes=5).to(device)\n    # load model weights\n    model_weight_path = \"./weights/model-29.pth\"\n    model.load_state_dict(torch.load(model_weight_path, map_location=device))\n    model.eval()\n    with torch.no_grad():\n        # predict class\n        output = torch.squeeze(model(img.to(device))).cpu()\n        predict = torch.softmax(output, dim=0)\n        predict_cla = torch.argmax(predict).numpy()\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_cla)],\n                                                 predict[predict_cla].numpy())\n    plt.title(print_res)\n    for i in range(len(predict)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  predict[i].numpy()))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/Test10_regnet/pretrain_weights.py",
    "content": "import requests\n\n\ndownload_links = {\n    \"regnetx_200mf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_002-e7e85e5c.pth',\n    \"regnetx_400mf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_004-7d0e9424.pth',\n    \"regnetx_600mf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_006-85ec1baa.pth',\n    \"regnetx_800mf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_008-d8b470eb.pth',\n    \"regnetx_1.6gf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_016-65ca972a.pth',\n    \"regnetx_3.2gf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_032-ed0c7f7e.pth',\n    \"regnetx_4.0gf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_040-73c2a654.pth',\n    \"regnetx_6.4gf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_064-29278baa.pth',\n    \"regnetx_8.0gf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_080-7c7fcab1.pth',\n    \"regnetx_12gf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_120-65d5521e.pth',\n    \"regnetx_16gf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_160-c98c4112.pth',\n    \"regnetx_32gf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_320-8ea38b93.pth',\n    \"regnety_200mf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_002-e68ca334.pth',\n    \"regnety_400mf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_004-0db870e6.pth',\n    \"regnety_600mf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_006-c67e57ec.pth',\n    \"regnety_800mf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_008-dc900dbe.pth',\n    \"regnety_1.6gf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_016-54367f74.pth',\n    \"regnety_3.2gf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/regnety_032_ra-7f2439f9.pth',\n    \"regnety_4.0gf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_040-f0d569f9.pth',\n    \"regnety_6.4gf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_064-0a48325c.pth',\n    \"regnety_8.0gf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_080-e7f3eb93.pth',\n    \"regnety_12gf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_120-721ba79a.pth',\n    \"regnety_16gf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_160-d64013cd.pth',\n    \"regnety_32gf\": 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_320-ba464b29.pth',\n}\n\n\ndef main():\n    model_name = \"regnetx_400mf\"\n    print(\"download weights name: \" + model_name)\n\n    if model_name not in download_links.keys():\n        raise KeyError(\"{} not in download_links\".format(model_name))\n\n    headers = {\"Content-Type\": \"application/json\",\n               \"Connection\": \"close\",\n               \"User-Agent\": \"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0\"}\n\n    save_weights = \"./\" + model_name + \".pth\"\n\n    req = requests.get(url=download_links[model_name],\n                       stream=True, headers=headers, timeout=10)\n    req.raise_for_status()\n    info = int(req.headers[\"Content-Length\"])\n\n    accumulate_data = 0\n    with open(save_weights, \"wb\") as f:\n        for data in req.iter_content(2048):\n            f.write(data)\n            accumulate_data += 2048\n            print(\"\\rdownload: [{}Mb/{}Mb] {}%\".format(int(accumulate_data / 1024 / 1024),\n                                                       int(info / 1024 / 1024),\n                                                       int(accumulate_data / info * 100)), end=\"\")\n    req.close()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/Test10_regnet/train.py",
    "content": "import os\nimport math\nimport argparse\n\nimport torch\nimport torch.optim as optim\nfrom torch.utils.tensorboard import SummaryWriter\nfrom torchvision import transforms\nimport torch.optim.lr_scheduler as lr_scheduler\n\nfrom model import create_regnet\nfrom my_dataset import MyDataSet\nfrom utils import read_split_data, train_one_epoch, evaluate\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n\n    print(args)\n    print('Start Tensorboard with \"tensorboard --logdir=runs\", view at http://localhost:6006/')\n    tb_writer = SummaryWriter()\n    if os.path.exists(\"./weights\") is False:\n        os.makedirs(\"./weights\")\n\n    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.RandomResizedCrop(224),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),\n        \"val\": transforms.Compose([transforms.Resize(256),\n                                   transforms.CenterCrop(224),\n                                   transforms.ToTensor(),\n                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}\n\n    # 实例化训练数据集\n    train_dataset = MyDataSet(images_path=train_images_path,\n                              images_class=train_images_label,\n                              transform=data_transform[\"train\"])\n\n    # 实例化验证数据集\n    val_dataset = MyDataSet(images_path=val_images_path,\n                            images_class=val_images_label,\n                            transform=data_transform[\"val\"])\n\n    batch_size = args.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using {} dataloader workers every process'.format(nw))\n    train_loader = torch.utils.data.DataLoader(train_dataset,\n                                               batch_size=batch_size,\n                                               shuffle=True,\n                                               pin_memory=True,\n                                               num_workers=nw,\n                                               collate_fn=train_dataset.collate_fn)\n\n    val_loader = torch.utils.data.DataLoader(val_dataset,\n                                             batch_size=batch_size,\n                                             shuffle=False,\n                                             pin_memory=True,\n                                             num_workers=nw,\n                                             collate_fn=val_dataset.collate_fn)\n\n    # 如果存在预训练权重则载入\n    model = create_regnet(model_name=args.model_name,\n                          num_classes=args.num_classes).to(device)\n    # print(model)\n\n    if args.weights != \"\":\n        if os.path.exists(args.weights):\n            weights_dict = torch.load(args.weights, map_location=device)\n            load_weights_dict = {k: v for k, v in weights_dict.items()\n                                 if model.state_dict()[k].numel() == v.numel()}\n            print(model.load_state_dict(load_weights_dict, strict=False))\n        else:\n            raise FileNotFoundError(\"not found weights file: {}\".format(args.weights))\n\n    # 是否冻结权重\n    if args.freeze_layers:\n        for name, para in model.named_parameters():\n            # 除最后的全连接层外，其他权重全部冻结\n            if \"head\" not in name:\n                para.requires_grad_(False)\n            else:\n                print(\"train {}\".format(name))\n\n    pg = [p for p in model.parameters() if p.requires_grad]\n    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=5E-5)\n    # Scheduler https://arxiv.org/pdf/1812.01187.pdf\n    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine\n    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)\n\n    for epoch in range(args.epochs):\n        # train\n        mean_loss = train_one_epoch(model=model,\n                                    optimizer=optimizer,\n                                    data_loader=train_loader,\n                                    device=device,\n                                    epoch=epoch)\n\n        scheduler.step()\n\n        # validate\n        acc = evaluate(model=model,\n                       data_loader=val_loader,\n                       device=device)\n\n        print(\"[epoch {}] accuracy: {}\".format(epoch, round(acc, 3)))\n        tags = [\"loss\", \"accuracy\", \"learning_rate\"]\n        tb_writer.add_scalar(tags[0], mean_loss, epoch)\n        tb_writer.add_scalar(tags[1], acc, epoch)\n        tb_writer.add_scalar(tags[2], optimizer.param_groups[0][\"lr\"], epoch)\n\n        torch.save(model.state_dict(), \"./weights/model-{}.pth\".format(epoch))\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--num_classes', type=int, default=5)\n    parser.add_argument('--epochs', type=int, default=30)\n    parser.add_argument('--batch-size', type=int, default=16)\n    parser.add_argument('--lr', type=float, default=0.001)\n    parser.add_argument('--lrf', type=float, default=0.01)\n\n    # 数据集所在根目录\n    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz\n    parser.add_argument('--data-path', type=str,\n                        default=\"/data/flower_photos\")\n    parser.add_argument('--model-name', default='RegNetY_400MF', help='create model name')\n\n    # 预训练权重下载地址\n    # 链接: https://pan.baidu.com/s/1XTo3walj9ai7ZhWz7jh-YA  密码: 8lmu\n    parser.add_argument('--weights', type=str, default='regnety_400mf.pth',\n                        help='initial weights path')\n    parser.add_argument('--freeze-layers', type=bool, default=False)\n    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')\n\n    opt = parser.parse_args()\n\n    main(opt)\n"
  },
  {
    "path": "pytorch_classification/Test10_regnet/utils.py",
    "content": "import os\nimport sys\nimport json\nimport pickle\nimport random\n\nimport torch\nfrom tqdm import tqdm\n\nimport matplotlib.pyplot as plt\n\n\ndef read_split_data(root: str, val_rate: float = 0.2):\n    random.seed(0)  # 保证随机结果可复现\n    assert os.path.exists(root), \"dataset root: {} does not exist.\".format(root)\n\n    # 遍历文件夹，一个文件夹对应一个类别\n    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]\n    # 排序，保证各平台顺序一致\n    flower_class.sort()\n    # 生成类别名称以及对应的数字索引\n    class_indices = dict((k, v) for v, k in enumerate(flower_class))\n    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    train_images_path = []  # 存储训练集的所有图片路径\n    train_images_label = []  # 存储训练集图片对应索引信息\n    val_images_path = []  # 存储验证集的所有图片路径\n    val_images_label = []  # 存储验证集图片对应索引信息\n    every_class_num = []  # 存储每个类别的样本总数\n    supported = [\".jpg\", \".JPG\", \".png\", \".PNG\"]  # 支持的文件后缀类型\n    # 遍历每个文件夹下的文件\n    for cla in flower_class:\n        cla_path = os.path.join(root, cla)\n        # 遍历获取supported支持的所有文件路径\n        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)\n                  if os.path.splitext(i)[-1] in supported]\n        # 排序，保证各平台顺序一致\n        images.sort()\n        # 获取该类别对应的索引\n        image_class = class_indices[cla]\n        # 记录该类别的样本数量\n        every_class_num.append(len(images))\n        # 按比例随机采样验证样本\n        val_path = random.sample(images, k=int(len(images) * val_rate))\n\n        for img_path in images:\n            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集\n                val_images_path.append(img_path)\n                val_images_label.append(image_class)\n            else:  # 否则存入训练集\n                train_images_path.append(img_path)\n                train_images_label.append(image_class)\n\n    print(\"{} images were found in the dataset.\".format(sum(every_class_num)))\n    print(\"{} images for training.\".format(len(train_images_path)))\n    print(\"{} images for validation.\".format(len(val_images_path)))\n    assert len(train_images_path) > 0, \"number of training images must greater than 0.\"\n    assert len(val_images_path) > 0, \"number of validation images must greater than 0.\"\n\n    plot_image = False\n    if plot_image:\n        # 绘制每种类别个数柱状图\n        plt.bar(range(len(flower_class)), every_class_num, align='center')\n        # 将横坐标0,1,2,3,4替换为相应的类别名称\n        plt.xticks(range(len(flower_class)), flower_class)\n        # 在柱状图上添加数值标签\n        for i, v in enumerate(every_class_num):\n            plt.text(x=i, y=v + 5, s=str(v), ha='center')\n        # 设置x坐标\n        plt.xlabel('image class')\n        # 设置y坐标\n        plt.ylabel('number of images')\n        # 设置柱状图的标题\n        plt.title('flower class distribution')\n        plt.show()\n\n    return train_images_path, train_images_label, val_images_path, val_images_label\n\n\ndef plot_data_loader_image(data_loader):\n    batch_size = data_loader.batch_size\n    plot_num = min(batch_size, 4)\n\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), json_path + \" does not exist.\"\n    json_file = open(json_path, 'r')\n    class_indices = json.load(json_file)\n\n    for data in data_loader:\n        images, labels = data\n        for i in range(plot_num):\n            # [C, H, W] -> [H, W, C]\n            img = images[i].numpy().transpose(1, 2, 0)\n            # 反Normalize操作\n            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255\n            label = labels[i].item()\n            plt.subplot(1, plot_num, i+1)\n            plt.xlabel(class_indices[str(label)])\n            plt.xticks([])  # 去掉x轴的刻度\n            plt.yticks([])  # 去掉y轴的刻度\n            plt.imshow(img.astype('uint8'))\n        plt.show()\n\n\ndef write_pickle(list_info: list, file_name: str):\n    with open(file_name, 'wb') as f:\n        pickle.dump(list_info, f)\n\n\ndef read_pickle(file_name: str) -> list:\n    with open(file_name, 'rb') as f:\n        info_list = pickle.load(f)\n        return info_list\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch):\n    model.train()\n    loss_function = torch.nn.CrossEntropyLoss()\n    mean_loss = torch.zeros(1).to(device)\n    optimizer.zero_grad()\n\n    data_loader = tqdm(data_loader, file=sys.stdout)\n\n    for step, data in enumerate(data_loader):\n        images, labels = data\n\n        pred = model(images.to(device))\n\n        loss = loss_function(pred, labels.to(device))\n        loss.backward()\n        mean_loss = (mean_loss * step + loss.detach()) / (step + 1)  # update mean losses\n\n        data_loader.desc = \"[epoch {}] mean loss {}\".format(epoch, round(mean_loss.item(), 3))\n\n        if not torch.isfinite(loss):\n            print('WARNING: non-finite loss, ending training ', loss)\n            sys.exit(1)\n\n        optimizer.step()\n        optimizer.zero_grad()\n\n    return mean_loss.item()\n\n\n@torch.no_grad()\ndef evaluate(model, data_loader, device):\n    model.eval()\n\n    # 验证样本总个数\n    total_num = len(data_loader.dataset)\n\n    # 用于存储预测正确的样本个数\n    sum_num = torch.zeros(1).to(device)\n\n    data_loader = tqdm(data_loader, file=sys.stdout)\n\n    for step, data in enumerate(data_loader):\n        images, labels = data\n        pred = model(images.to(device))\n        pred = torch.max(pred, dim=1)[1]\n        sum_num += torch.eq(pred, labels.to(device)).sum()\n\n    return sum_num.item() / total_num\n"
  },
  {
    "path": "pytorch_classification/Test11_efficientnetV2/README.md",
    "content": "## 代码使用简介\n\n1. 下载好数据集，代码中默认使用的是花分类数据集，下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz),\n如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0\n2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径\n3. 下载预训练权重，根据自己使用的模型下载对应预训练权重: https://pan.baidu.com/s/1uZX36rvrfEss-JGj4yfzbQ  密码: 5gu1\n4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径\n5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件)\n6. 在`predict.py`脚本中导入和训练脚本中同样的模型，并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下)\n7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径\n8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了\n9. 如果要使用自己的数据集，请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹)，并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数\n"
  },
  {
    "path": "pytorch_classification/Test11_efficientnetV2/class_indices.json",
    "content": "{\n    \"0\": \"daisy\",\n    \"1\": \"dandelion\",\n    \"2\": \"roses\",\n    \"3\": \"sunflowers\",\n    \"4\": \"tulips\"\n}"
  },
  {
    "path": "pytorch_classification/Test11_efficientnetV2/model.py",
    "content": "from collections import OrderedDict\nfrom functools import partial\nfrom typing import Callable, Optional\n\nimport torch.nn as nn\nimport torch\nfrom torch import Tensor\n\n\ndef drop_path(x, drop_prob: float = 0., training: bool = False):\n    \"\"\"\n    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).\n    \"Deep Networks with Stochastic Depth\", https://arxiv.org/pdf/1603.09382.pdf\n\n    This function is taken from the rwightman.\n    It can be seen here:\n    https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py#L140\n    \"\"\"\n    if drop_prob == 0. or not training:\n        return x\n    keep_prob = 1 - drop_prob\n    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets\n    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)\n    random_tensor.floor_()  # binarize\n    output = x.div(keep_prob) * random_tensor\n    return output\n\n\nclass DropPath(nn.Module):\n    \"\"\"\n    Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).\n    \"Deep Networks with Stochastic Depth\", https://arxiv.org/pdf/1603.09382.pdf\n    \"\"\"\n    def __init__(self, drop_prob=None):\n        super(DropPath, self).__init__()\n        self.drop_prob = drop_prob\n\n    def forward(self, x):\n        return drop_path(x, self.drop_prob, self.training)\n\n\nclass ConvBNAct(nn.Module):\n    def __init__(self,\n                 in_planes: int,\n                 out_planes: int,\n                 kernel_size: int = 3,\n                 stride: int = 1,\n                 groups: int = 1,\n                 norm_layer: Optional[Callable[..., nn.Module]] = None,\n                 activation_layer: Optional[Callable[..., nn.Module]] = None):\n        super(ConvBNAct, self).__init__()\n\n        padding = (kernel_size - 1) // 2\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n        if activation_layer is None:\n            activation_layer = nn.SiLU  # alias Swish  (torch>=1.7)\n\n        self.conv = nn.Conv2d(in_channels=in_planes,\n                              out_channels=out_planes,\n                              kernel_size=kernel_size,\n                              stride=stride,\n                              padding=padding,\n                              groups=groups,\n                              bias=False)\n\n        self.bn = norm_layer(out_planes)\n        self.act = activation_layer()\n\n    def forward(self, x):\n        result = self.conv(x)\n        result = self.bn(result)\n        result = self.act(result)\n\n        return result\n\n\nclass SqueezeExcite(nn.Module):\n    def __init__(self,\n                 input_c: int,   # block input channel\n                 expand_c: int,  # block expand channel\n                 se_ratio: float = 0.25):\n        super(SqueezeExcite, self).__init__()\n        squeeze_c = int(input_c * se_ratio)\n        self.conv_reduce = nn.Conv2d(expand_c, squeeze_c, 1)\n        self.act1 = nn.SiLU()  # alias Swish\n        self.conv_expand = nn.Conv2d(squeeze_c, expand_c, 1)\n        self.act2 = nn.Sigmoid()\n\n    def forward(self, x: Tensor) -> Tensor:\n        scale = x.mean((2, 3), keepdim=True)\n        scale = self.conv_reduce(scale)\n        scale = self.act1(scale)\n        scale = self.conv_expand(scale)\n        scale = self.act2(scale)\n        return scale * x\n\n\nclass MBConv(nn.Module):\n    def __init__(self,\n                 kernel_size: int,\n                 input_c: int,\n                 out_c: int,\n                 expand_ratio: int,\n                 stride: int,\n                 se_ratio: float,\n                 drop_rate: float,\n                 norm_layer: Callable[..., nn.Module]):\n        super(MBConv, self).__init__()\n\n        if stride not in [1, 2]:\n            raise ValueError(\"illegal stride value.\")\n\n        self.has_shortcut = (stride == 1 and input_c == out_c)\n\n        activation_layer = nn.SiLU  # alias Swish\n        expanded_c = input_c * expand_ratio\n\n        # 在EfficientNetV2中，MBConv中不存在expansion=1的情况所以conv_pw肯定存在\n        assert expand_ratio != 1\n        # Point-wise expansion\n        self.expand_conv = ConvBNAct(input_c,\n                                     expanded_c,\n                                     kernel_size=1,\n                                     norm_layer=norm_layer,\n                                     activation_layer=activation_layer)\n\n        # Depth-wise convolution\n        self.dwconv = ConvBNAct(expanded_c,\n                                expanded_c,\n                                kernel_size=kernel_size,\n                                stride=stride,\n                                groups=expanded_c,\n                                norm_layer=norm_layer,\n                                activation_layer=activation_layer)\n\n        self.se = SqueezeExcite(input_c, expanded_c, se_ratio) if se_ratio > 0 else nn.Identity()\n\n        # Point-wise linear projection\n        self.project_conv = ConvBNAct(expanded_c,\n                                      out_planes=out_c,\n                                      kernel_size=1,\n                                      norm_layer=norm_layer,\n                                      activation_layer=nn.Identity)  # 注意这里没有激活函数，所有传入Identity\n\n        self.out_channels = out_c\n\n        # 只有在使用shortcut连接时才使用dropout层\n        self.drop_rate = drop_rate\n        if self.has_shortcut and drop_rate > 0:\n            self.dropout = DropPath(drop_rate)\n\n    def forward(self, x: Tensor) -> Tensor:\n        result = self.expand_conv(x)\n        result = self.dwconv(result)\n        result = self.se(result)\n        result = self.project_conv(result)\n\n        if self.has_shortcut:\n            if self.drop_rate > 0:\n                result = self.dropout(result)\n            result += x\n\n        return result\n\n\nclass FusedMBConv(nn.Module):\n    def __init__(self,\n                 kernel_size: int,\n                 input_c: int,\n                 out_c: int,\n                 expand_ratio: int,\n                 stride: int,\n                 se_ratio: float,\n                 drop_rate: float,\n                 norm_layer: Callable[..., nn.Module]):\n        super(FusedMBConv, self).__init__()\n\n        assert stride in [1, 2]\n        assert se_ratio == 0\n\n        self.has_shortcut = stride == 1 and input_c == out_c\n        self.drop_rate = drop_rate\n\n        self.has_expansion = expand_ratio != 1\n\n        activation_layer = nn.SiLU  # alias Swish\n        expanded_c = input_c * expand_ratio\n\n        # 只有当expand ratio不等于1时才有expand conv\n        if self.has_expansion:\n            # Expansion convolution\n            self.expand_conv = ConvBNAct(input_c,\n                                         expanded_c,\n                                         kernel_size=kernel_size,\n                                         stride=stride,\n                                         norm_layer=norm_layer,\n                                         activation_layer=activation_layer)\n\n            self.project_conv = ConvBNAct(expanded_c,\n                                          out_c,\n                                          kernel_size=1,\n                                          norm_layer=norm_layer,\n                                          activation_layer=nn.Identity)  # 注意没有激活函数\n        else:\n            # 当只有project_conv时的情况\n            self.project_conv = ConvBNAct(input_c,\n                                          out_c,\n                                          kernel_size=kernel_size,\n                                          stride=stride,\n                                          norm_layer=norm_layer,\n                                          activation_layer=activation_layer)  # 注意有激活函数\n\n        self.out_channels = out_c\n\n        # 只有在使用shortcut连接时才使用dropout层\n        self.drop_rate = drop_rate\n        if self.has_shortcut and drop_rate > 0:\n            self.dropout = DropPath(drop_rate)\n\n    def forward(self, x: Tensor) -> Tensor:\n        if self.has_expansion:\n            result = self.expand_conv(x)\n            result = self.project_conv(result)\n        else:\n            result = self.project_conv(x)\n\n        if self.has_shortcut:\n            if self.drop_rate > 0:\n                result = self.dropout(result)\n\n            result += x\n\n        return result\n\n\nclass EfficientNetV2(nn.Module):\n    def __init__(self,\n                 model_cnf: list,\n                 num_classes: int = 1000,\n                 num_features: int = 1280,\n                 dropout_rate: float = 0.2,\n                 drop_connect_rate: float = 0.2):\n        super(EfficientNetV2, self).__init__()\n\n        for cnf in model_cnf:\n            assert len(cnf) == 8\n\n        norm_layer = partial(nn.BatchNorm2d, eps=1e-3, momentum=0.1)\n\n        stem_filter_num = model_cnf[0][4]\n\n        self.stem = ConvBNAct(3,\n                              stem_filter_num,\n                              kernel_size=3,\n                              stride=2,\n                              norm_layer=norm_layer)  # 激活函数默认是SiLU\n\n        total_blocks = sum([i[0] for i in model_cnf])\n        block_id = 0\n        blocks = []\n        for cnf in model_cnf:\n            repeats = cnf[0]\n            op = FusedMBConv if cnf[-2] == 0 else MBConv\n            for i in range(repeats):\n                blocks.append(op(kernel_size=cnf[1],\n                                 input_c=cnf[4] if i == 0 else cnf[5],\n                                 out_c=cnf[5],\n                                 expand_ratio=cnf[3],\n                                 stride=cnf[2] if i == 0 else 1,\n                                 se_ratio=cnf[-1],\n                                 drop_rate=drop_connect_rate * block_id / total_blocks,\n                                 norm_layer=norm_layer))\n                block_id += 1\n        self.blocks = nn.Sequential(*blocks)\n\n        head_input_c = model_cnf[-1][-3]\n        head = OrderedDict()\n\n        head.update({\"project_conv\": ConvBNAct(head_input_c,\n                                               num_features,\n                                               kernel_size=1,\n                                               norm_layer=norm_layer)})  # 激活函数默认是SiLU\n\n        head.update({\"avgpool\": nn.AdaptiveAvgPool2d(1)})\n        head.update({\"flatten\": nn.Flatten()})\n\n        if dropout_rate > 0:\n            head.update({\"dropout\": nn.Dropout(p=dropout_rate, inplace=True)})\n        head.update({\"classifier\": nn.Linear(num_features, num_classes)})\n\n        self.head = nn.Sequential(head)\n\n        # initial weights\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode=\"fan_out\")\n                if m.bias is not None:\n                    nn.init.zeros_(m.bias)\n            elif isinstance(m, nn.BatchNorm2d):\n                nn.init.ones_(m.weight)\n                nn.init.zeros_(m.bias)\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                nn.init.zeros_(m.bias)\n\n    def forward(self, x: Tensor) -> Tensor:\n        x = self.stem(x)\n        x = self.blocks(x)\n        x = self.head(x)\n\n        return x\n\n\ndef efficientnetv2_s(num_classes: int = 1000):\n    \"\"\"\n    EfficientNetV2\n    https://arxiv.org/abs/2104.00298\n    \"\"\"\n    # train_size: 300, eval_size: 384\n\n    # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio\n    model_config = [[2, 3, 1, 1, 24, 24, 0, 0],\n                    [4, 3, 2, 4, 24, 48, 0, 0],\n                    [4, 3, 2, 4, 48, 64, 0, 0],\n                    [6, 3, 2, 4, 64, 128, 1, 0.25],\n                    [9, 3, 1, 6, 128, 160, 1, 0.25],\n                    [15, 3, 2, 6, 160, 256, 1, 0.25]]\n\n    model = EfficientNetV2(model_cnf=model_config,\n                           num_classes=num_classes,\n                           dropout_rate=0.2)\n    return model\n\n\ndef efficientnetv2_m(num_classes: int = 1000):\n    \"\"\"\n    EfficientNetV2\n    https://arxiv.org/abs/2104.00298\n    \"\"\"\n    # train_size: 384, eval_size: 480\n\n    # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio\n    model_config = [[3, 3, 1, 1, 24, 24, 0, 0],\n                    [5, 3, 2, 4, 24, 48, 0, 0],\n                    [5, 3, 2, 4, 48, 80, 0, 0],\n                    [7, 3, 2, 4, 80, 160, 1, 0.25],\n                    [14, 3, 1, 6, 160, 176, 1, 0.25],\n                    [18, 3, 2, 6, 176, 304, 1, 0.25],\n                    [5, 3, 1, 6, 304, 512, 1, 0.25]]\n\n    model = EfficientNetV2(model_cnf=model_config,\n                           num_classes=num_classes,\n                           dropout_rate=0.3)\n    return model\n\n\ndef efficientnetv2_l(num_classes: int = 1000):\n    \"\"\"\n    EfficientNetV2\n    https://arxiv.org/abs/2104.00298\n    \"\"\"\n    # train_size: 384, eval_size: 480\n\n    # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio\n    model_config = [[4, 3, 1, 1, 32, 32, 0, 0],\n                    [7, 3, 2, 4, 32, 64, 0, 0],\n                    [7, 3, 2, 4, 64, 96, 0, 0],\n                    [10, 3, 2, 4, 96, 192, 1, 0.25],\n                    [19, 3, 1, 6, 192, 224, 1, 0.25],\n                    [25, 3, 2, 6, 224, 384, 1, 0.25],\n                    [7, 3, 1, 6, 384, 640, 1, 0.25]]\n\n    model = EfficientNetV2(model_cnf=model_config,\n                           num_classes=num_classes,\n                           dropout_rate=0.4)\n    return model\n"
  },
  {
    "path": "pytorch_classification/Test11_efficientnetV2/my_dataset.py",
    "content": "from PIL import Image\nimport torch\nfrom torch.utils.data import Dataset\n\n\nclass MyDataSet(Dataset):\n    \"\"\"自定义数据集\"\"\"\n\n    def __init__(self, images_path: list, images_class: list, transform=None):\n        self.images_path = images_path\n        self.images_class = images_class\n        self.transform = transform\n\n    def __len__(self):\n        return len(self.images_path)\n\n    def __getitem__(self, item):\n        img = Image.open(self.images_path[item])\n        # RGB为彩色图片，L为灰度图片\n        if img.mode != 'RGB':\n            raise ValueError(\"image: {} isn't RGB mode.\".format(self.images_path[item]))\n        label = self.images_class[item]\n\n        if self.transform is not None:\n            img = self.transform(img)\n\n        return img, label\n\n    @staticmethod\n    def collate_fn(batch):\n        # 官方实现的default_collate可以参考\n        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py\n        images, labels = tuple(zip(*batch))\n\n        images = torch.stack(images, dim=0)\n        labels = torch.as_tensor(labels)\n        return images, labels\n"
  },
  {
    "path": "pytorch_classification/Test11_efficientnetV2/predict.py",
    "content": "import os\nimport json\n\nimport torch\nfrom PIL import Image\nfrom torchvision import transforms\nimport matplotlib.pyplot as plt\n\nfrom model import efficientnetv2_s as create_model\n\n\ndef main():\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n\n    img_size = {\"s\": [300, 384],  # train_size, val_size\n                \"m\": [384, 480],\n                \"l\": [384, 480]}\n    num_model = \"s\"\n\n    data_transform = transforms.Compose(\n        [transforms.Resize(img_size[num_model][1]),\n         transforms.CenterCrop(img_size[num_model][1]),\n         transforms.ToTensor(),\n         transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n    plt.imshow(img)\n    # [N, C, H, W]\n    img = data_transform(img)\n    # expand batch dimension\n    img = torch.unsqueeze(img, dim=0)\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    # create model\n    model = create_model(num_classes=5).to(device)\n    # load model weights\n    model_weight_path = \"./weights/model-29.pth\"\n    model.load_state_dict(torch.load(model_weight_path, map_location=device))\n    model.eval()\n    with torch.no_grad():\n        # predict class\n        output = torch.squeeze(model(img.to(device))).cpu()\n        predict = torch.softmax(output, dim=0)\n        predict_cla = torch.argmax(predict).numpy()\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_cla)],\n                                                 predict[predict_cla].numpy())\n    plt.title(print_res)\n    for i in range(len(predict)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  predict[i].numpy()))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/Test11_efficientnetV2/train.py",
    "content": "import os\nimport math\nimport argparse\n\nimport torch\nimport torch.optim as optim\nfrom torch.utils.tensorboard import SummaryWriter\nfrom torchvision import transforms\nimport torch.optim.lr_scheduler as lr_scheduler\n\nfrom model import efficientnetv2_s as create_model\nfrom my_dataset import MyDataSet\nfrom utils import read_split_data, train_one_epoch, evaluate\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n\n    print(args)\n    print('Start Tensorboard with \"tensorboard --logdir=runs\", view at http://localhost:6006/')\n    tb_writer = SummaryWriter()\n    if os.path.exists(\"./weights\") is False:\n        os.makedirs(\"./weights\")\n\n    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)\n\n    img_size = {\"s\": [300, 384],  # train_size, val_size\n                \"m\": [384, 480],\n                \"l\": [384, 480]}\n    num_model = \"s\"\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.RandomResizedCrop(img_size[num_model][0]),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),\n        \"val\": transforms.Compose([transforms.Resize(img_size[num_model][1]),\n                                   transforms.CenterCrop(img_size[num_model][1]),\n                                   transforms.ToTensor(),\n                                   transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])}\n\n    # 实例化训练数据集\n    train_dataset = MyDataSet(images_path=train_images_path,\n                              images_class=train_images_label,\n                              transform=data_transform[\"train\"])\n\n    # 实例化验证数据集\n    val_dataset = MyDataSet(images_path=val_images_path,\n                            images_class=val_images_label,\n                            transform=data_transform[\"val\"])\n\n    batch_size = args.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using {} dataloader workers every process'.format(nw))\n    train_loader = torch.utils.data.DataLoader(train_dataset,\n                                               batch_size=batch_size,\n                                               shuffle=True,\n                                               pin_memory=True,\n                                               num_workers=nw,\n                                               collate_fn=train_dataset.collate_fn)\n\n    val_loader = torch.utils.data.DataLoader(val_dataset,\n                                             batch_size=batch_size,\n                                             shuffle=False,\n                                             pin_memory=True,\n                                             num_workers=nw,\n                                             collate_fn=val_dataset.collate_fn)\n\n    # 如果存在预训练权重则载入\n    model = create_model(num_classes=args.num_classes).to(device)\n    if args.weights != \"\":\n        if os.path.exists(args.weights):\n            weights_dict = torch.load(args.weights, map_location=device)\n            load_weights_dict = {k: v for k, v in weights_dict.items()\n                                 if model.state_dict()[k].numel() == v.numel()}\n            print(model.load_state_dict(load_weights_dict, strict=False))\n        else:\n            raise FileNotFoundError(\"not found weights file: {}\".format(args.weights))\n\n    # 是否冻结权重\n    if args.freeze_layers:\n        for name, para in model.named_parameters():\n            # 除head外，其他权重全部冻结\n            if \"head\" not in name:\n                para.requires_grad_(False)\n            else:\n                print(\"training {}\".format(name))\n\n    pg = [p for p in model.parameters() if p.requires_grad]\n    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=1E-4)\n    # Scheduler https://arxiv.org/pdf/1812.01187.pdf\n    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine\n    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)\n\n    for epoch in range(args.epochs):\n        # train\n        train_loss, train_acc = train_one_epoch(model=model,\n                                                optimizer=optimizer,\n                                                data_loader=train_loader,\n                                                device=device,\n                                                epoch=epoch)\n\n        scheduler.step()\n\n        # validate\n        val_loss, val_acc = evaluate(model=model,\n                                     data_loader=val_loader,\n                                     device=device,\n                                     epoch=epoch)\n\n        tags = [\"train_loss\", \"train_acc\", \"val_loss\", \"val_acc\", \"learning_rate\"]\n        tb_writer.add_scalar(tags[0], train_loss, epoch)\n        tb_writer.add_scalar(tags[1], train_acc, epoch)\n        tb_writer.add_scalar(tags[2], val_loss, epoch)\n        tb_writer.add_scalar(tags[3], val_acc, epoch)\n        tb_writer.add_scalar(tags[4], optimizer.param_groups[0][\"lr\"], epoch)\n\n        torch.save(model.state_dict(), \"./weights/model-{}.pth\".format(epoch))\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--num_classes', type=int, default=5)\n    parser.add_argument('--epochs', type=int, default=30)\n    parser.add_argument('--batch-size', type=int, default=8)\n    parser.add_argument('--lr', type=float, default=0.01)\n    parser.add_argument('--lrf', type=float, default=0.01)\n\n    # 数据集所在根目录\n    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz\n    parser.add_argument('--data-path', type=str,\n                        default=\"/data/flower_photos\")\n\n    # download model weights\n    # 链接: https://pan.baidu.com/s/1uZX36rvrfEss-JGj4yfzbQ  密码: 5gu1\n    parser.add_argument('--weights', type=str, default='./pre_efficientnetv2-s.pth',\n                        help='initial weights path')\n    parser.add_argument('--freeze-layers', type=bool, default=True)\n    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')\n\n    opt = parser.parse_args()\n\n    main(opt)\n"
  },
  {
    "path": "pytorch_classification/Test11_efficientnetV2/trans_effv2_weights.py",
    "content": "import tensorflow as tf\nimport torch\nimport numpy as np\n\n\ndef main(model_name: str = \"efficientnetv2-s\",\n         tf_weights_path: str = \"./efficientnetv2-s/model\",\n         stage0_num: int = 2,\n         fused_conv_num: int = 10):\n\n    except_var = [\"global_step\"]\n\n    new_weights = {}\n    var_list = [i for i in tf.train.list_variables(tf_weights_path) if \"Exponential\" not in i[0]]\n    reader = tf.train.load_checkpoint(tf_weights_path)\n    for v in var_list:\n        if v[0] in except_var:\n            continue\n        new_name = v[0].replace(model_name + \"/\", \"\").replace(\"/\", \".\")\n\n        if \"stem\" in v[0]:\n            new_name = new_name.replace(\"conv2d.kernel\",\n                                        \"conv.weight\")\n\n            new_name = new_name.replace(\"tpu_batch_normalization.beta\",\n                                        \"bn.bias\")\n            new_name = new_name.replace(\"tpu_batch_normalization.gamma\",\n                                        \"bn.weight\")\n            new_name = new_name.replace(\"tpu_batch_normalization.moving_mean\",\n                                        \"bn.running_mean\")\n            new_name = new_name.replace(\"tpu_batch_normalization.moving_variance\",\n                                        \"bn.running_var\")\n        elif \"head\" in v[0]:\n            new_name = new_name.replace(\"conv2d.kernel\",\n                                        \"project_conv.conv.weight\")\n            new_name = new_name.replace(\"dense.kernel\",\n                                        \"classifier.weight\")\n            new_name = new_name.replace(\"dense.bias\",\n                                        \"classifier.bias\")\n\n            new_name = new_name.replace(\"tpu_batch_normalization.beta\",\n                                        \"project_conv.bn.bias\")\n            new_name = new_name.replace(\"tpu_batch_normalization.gamma\",\n                                        \"project_conv.bn.weight\")\n            new_name = new_name.replace(\"tpu_batch_normalization.moving_mean\",\n                                        \"project_conv.bn.running_mean\")\n            new_name = new_name.replace(\"tpu_batch_normalization.moving_variance\",\n                                        \"project_conv.bn.running_var\")\n        elif \"blocks\" in v[0]:\n            # e.g. blocks_0.conv2d.kernel -> 0\n            blocks_id = new_name.split(\".\", maxsplit=1)[0].replace(\"blocks_\", \"\")\n            new_name = new_name.replace(\"blocks_{}\".format(blocks_id),\n                                        \"blocks.{}\".format(blocks_id))\n\n            if int(blocks_id) <= stage0_num - 1:  # expansion=1 fused_mbconv\n                new_name = new_name.replace(\"conv2d.kernel\",\n                                            \"project_conv.conv.weight\")\n                new_name = new_name.replace(\"tpu_batch_normalization.beta\",\n                                            \"project_conv.bn.bias\")\n                new_name = new_name.replace(\"tpu_batch_normalization.gamma\",\n                                            \"project_conv.bn.weight\")\n                new_name = new_name.replace(\"tpu_batch_normalization.moving_mean\",\n                                            \"project_conv.bn.running_mean\")\n                new_name = new_name.replace(\"tpu_batch_normalization.moving_variance\",\n                                            \"project_conv.bn.running_var\")\n            else:\n                new_name = new_name.replace(\"blocks.{}.conv2d.kernel\".format(blocks_id),\n                                            \"blocks.{}.expand_conv.conv.weight\".format(blocks_id))\n                new_name = new_name.replace(\"tpu_batch_normalization.beta\",\n                                            \"expand_conv.bn.bias\")\n                new_name = new_name.replace(\"tpu_batch_normalization.gamma\",\n                                            \"expand_conv.bn.weight\")\n                new_name = new_name.replace(\"tpu_batch_normalization.moving_mean\",\n                                            \"expand_conv.bn.running_mean\")\n                new_name = new_name.replace(\"tpu_batch_normalization.moving_variance\",\n                                            \"expand_conv.bn.running_var\")\n\n                if int(blocks_id) <= fused_conv_num - 1:  # fused_mbconv\n                    new_name = new_name.replace(\"blocks.{}.conv2d_1.kernel\".format(blocks_id),\n                                                \"blocks.{}.project_conv.conv.weight\".format(blocks_id))\n                    new_name = new_name.replace(\"tpu_batch_normalization_1.beta\",\n                                                \"project_conv.bn.bias\")\n                    new_name = new_name.replace(\"tpu_batch_normalization_1.gamma\",\n                                                \"project_conv.bn.weight\")\n                    new_name = new_name.replace(\"tpu_batch_normalization_1.moving_mean\",\n                                                \"project_conv.bn.running_mean\")\n                    new_name = new_name.replace(\"tpu_batch_normalization_1.moving_variance\",\n                                                \"project_conv.bn.running_var\")\n                else:  # mbconv\n                    new_name = new_name.replace(\"blocks.{}.conv2d_1.kernel\".format(blocks_id),\n                                                \"blocks.{}.project_conv.conv.weight\".format(blocks_id))\n\n                    new_name = new_name.replace(\"depthwise_conv2d.depthwise_kernel\",\n                                                \"dwconv.conv.weight\")\n\n                    new_name = new_name.replace(\"tpu_batch_normalization_1.beta\",\n                                                \"dwconv.bn.bias\")\n                    new_name = new_name.replace(\"tpu_batch_normalization_1.gamma\",\n                                                \"dwconv.bn.weight\")\n                    new_name = new_name.replace(\"tpu_batch_normalization_1.moving_mean\",\n                                                \"dwconv.bn.running_mean\")\n                    new_name = new_name.replace(\"tpu_batch_normalization_1.moving_variance\",\n                                                \"dwconv.bn.running_var\")\n\n                    new_name = new_name.replace(\"tpu_batch_normalization_2.beta\",\n                                                \"project_conv.bn.bias\")\n                    new_name = new_name.replace(\"tpu_batch_normalization_2.gamma\",\n                                                \"project_conv.bn.weight\")\n                    new_name = new_name.replace(\"tpu_batch_normalization_2.moving_mean\",\n                                                \"project_conv.bn.running_mean\")\n                    new_name = new_name.replace(\"tpu_batch_normalization_2.moving_variance\",\n                                                \"project_conv.bn.running_var\")\n\n                    new_name = new_name.replace(\"se.conv2d.bias\",\n                                                \"se.conv_reduce.bias\")\n                    new_name = new_name.replace(\"se.conv2d.kernel\",\n                                                \"se.conv_reduce.weight\")\n                    new_name = new_name.replace(\"se.conv2d_1.bias\",\n                                                \"se.conv_expand.bias\")\n                    new_name = new_name.replace(\"se.conv2d_1.kernel\",\n                                                \"se.conv_expand.weight\")\n        else:\n            print(\"not recognized name: \" + v[0])\n\n        var = reader.get_tensor(v[0])\n        new_var = var\n        if \"conv\" in new_name and \"weight\" in new_name and \"bn\" not in new_name and \"dw\" not in new_name:\n            assert len(var.shape) == 4\n            # conv kernel [h, w, c, n] -> [n, c, h, w]\n            new_var = np.transpose(var, (3, 2, 0, 1))\n        elif \"bn\" in new_name:\n            pass\n        elif \"dwconv\" in new_name and \"weight\" in new_name:\n            # dw_kernel [h, w, n, c] -> [n, c, h, w]\n            assert len(var.shape) == 4\n            new_var = np.transpose(var, (2, 3, 0, 1))\n        elif \"classifier\" in new_name and \"weight\" in new_name:\n            assert len(var.shape) == 2\n            new_var = np.transpose(var, (1, 0))\n\n        new_weights[new_name] = torch.as_tensor(new_var)\n\n    torch.save(new_weights, \"pre_\" + model_name + \".pth\")\n\n\nif __name__ == '__main__':\n    main(model_name=\"efficientnetv2-s\",\n         tf_weights_path=\"./efficientnetv2-s/model\",\n         stage0_num=2,\n         fused_conv_num=10)\n\n    # main(model_name=\"efficientnetv2-m\",\n    #      tf_weights_path=\"./efficientnetv2-m/model\",\n    #      stage0_num=3,\n    #      fused_conv_num=13)\n\n    # main(model_name=\"efficientnetv2-l\",\n    #      tf_weights_path=\"./efficientnetv2-l/model\",\n    #      stage0_num=4,\n    #      fused_conv_num=18)\n"
  },
  {
    "path": "pytorch_classification/Test11_efficientnetV2/utils.py",
    "content": "import os\nimport sys\nimport json\nimport pickle\nimport random\n\nimport torch\nfrom tqdm import tqdm\n\nimport matplotlib.pyplot as plt\n\n\ndef read_split_data(root: str, val_rate: float = 0.2):\n    random.seed(0)  # 保证随机结果可复现\n    assert os.path.exists(root), \"dataset root: {} does not exist.\".format(root)\n\n    # 遍历文件夹，一个文件夹对应一个类别\n    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]\n    # 排序，保证各平台顺序一致\n    flower_class.sort()\n    # 生成类别名称以及对应的数字索引\n    class_indices = dict((k, v) for v, k in enumerate(flower_class))\n    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    train_images_path = []  # 存储训练集的所有图片路径\n    train_images_label = []  # 存储训练集图片对应索引信息\n    val_images_path = []  # 存储验证集的所有图片路径\n    val_images_label = []  # 存储验证集图片对应索引信息\n    every_class_num = []  # 存储每个类别的样本总数\n    supported = [\".jpg\", \".JPG\", \".png\", \".PNG\"]  # 支持的文件后缀类型\n    # 遍历每个文件夹下的文件\n    for cla in flower_class:\n        cla_path = os.path.join(root, cla)\n        # 遍历获取supported支持的所有文件路径\n        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)\n                  if os.path.splitext(i)[-1] in supported]\n        # 排序，保证各平台顺序一致\n        images.sort()\n        # 获取该类别对应的索引\n        image_class = class_indices[cla]\n        # 记录该类别的样本数量\n        every_class_num.append(len(images))\n        # 按比例随机采样验证样本\n        val_path = random.sample(images, k=int(len(images) * val_rate))\n\n        for img_path in images:\n            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集\n                val_images_path.append(img_path)\n                val_images_label.append(image_class)\n            else:  # 否则存入训练集\n                train_images_path.append(img_path)\n                train_images_label.append(image_class)\n\n    print(\"{} images were found in the dataset.\".format(sum(every_class_num)))\n    print(\"{} images for training.\".format(len(train_images_path)))\n    print(\"{} images for validation.\".format(len(val_images_path)))\n    assert len(train_images_path) > 0, \"number of training images must greater than 0.\"\n    assert len(val_images_path) > 0, \"number of validation images must greater than 0.\"\n\n    plot_image = False\n    if plot_image:\n        # 绘制每种类别个数柱状图\n        plt.bar(range(len(flower_class)), every_class_num, align='center')\n        # 将横坐标0,1,2,3,4替换为相应的类别名称\n        plt.xticks(range(len(flower_class)), flower_class)\n        # 在柱状图上添加数值标签\n        for i, v in enumerate(every_class_num):\n            plt.text(x=i, y=v + 5, s=str(v), ha='center')\n        # 设置x坐标\n        plt.xlabel('image class')\n        # 设置y坐标\n        plt.ylabel('number of images')\n        # 设置柱状图的标题\n        plt.title('flower class distribution')\n        plt.show()\n\n    return train_images_path, train_images_label, val_images_path, val_images_label\n\n\ndef plot_data_loader_image(data_loader):\n    batch_size = data_loader.batch_size\n    plot_num = min(batch_size, 4)\n\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), json_path + \" does not exist.\"\n    json_file = open(json_path, 'r')\n    class_indices = json.load(json_file)\n\n    for data in data_loader:\n        images, labels = data\n        for i in range(plot_num):\n            # [C, H, W] -> [H, W, C]\n            img = images[i].numpy().transpose(1, 2, 0)\n            # 反Normalize操作\n            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255\n            label = labels[i].item()\n            plt.subplot(1, plot_num, i+1)\n            plt.xlabel(class_indices[str(label)])\n            plt.xticks([])  # 去掉x轴的刻度\n            plt.yticks([])  # 去掉y轴的刻度\n            plt.imshow(img.astype('uint8'))\n        plt.show()\n\n\ndef write_pickle(list_info: list, file_name: str):\n    with open(file_name, 'wb') as f:\n        pickle.dump(list_info, f)\n\n\ndef read_pickle(file_name: str) -> list:\n    with open(file_name, 'rb') as f:\n        info_list = pickle.load(f)\n        return info_list\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch):\n    model.train()\n    loss_function = torch.nn.CrossEntropyLoss()\n    accu_loss = torch.zeros(1).to(device)  # 累计损失\n    accu_num = torch.zeros(1).to(device)   # 累计预测正确的样本数\n    optimizer.zero_grad()\n\n    sample_num = 0\n    data_loader = tqdm(data_loader, file=sys.stdout)\n    for step, data in enumerate(data_loader):\n        images, labels = data\n        sample_num += images.shape[0]\n\n        pred = model(images.to(device))\n        pred_classes = torch.max(pred, dim=1)[1]\n        accu_num += torch.eq(pred_classes, labels.to(device)).sum()\n\n        loss = loss_function(pred, labels.to(device))\n        loss.backward()\n        accu_loss += loss.detach()\n\n        data_loader.desc = \"[train epoch {}] loss: {:.3f}, acc: {:.3f}\".format(epoch,\n                                                                               accu_loss.item() / (step + 1),\n                                                                               accu_num.item() / sample_num)\n\n        if not torch.isfinite(loss):\n            print('WARNING: non-finite loss, ending training ', loss)\n            sys.exit(1)\n\n        optimizer.step()\n        optimizer.zero_grad()\n\n    return accu_loss.item() / (step + 1), accu_num.item() / sample_num\n\n\n@torch.no_grad()\ndef evaluate(model, data_loader, device, epoch):\n    loss_function = torch.nn.CrossEntropyLoss()\n\n    model.eval()\n\n    accu_num = torch.zeros(1).to(device)   # 累计预测正确的样本数\n    accu_loss = torch.zeros(1).to(device)  # 累计损失\n\n    sample_num = 0\n    data_loader = tqdm(data_loader, file=sys.stdout)\n    for step, data in enumerate(data_loader):\n        images, labels = data\n        sample_num += images.shape[0]\n\n        pred = model(images.to(device))\n        pred_classes = torch.max(pred, dim=1)[1]\n        accu_num += torch.eq(pred_classes, labels.to(device)).sum()\n\n        loss = loss_function(pred, labels.to(device))\n        accu_loss += loss\n\n        data_loader.desc = \"[valid epoch {}] loss: {:.3f}, acc: {:.3f}\".format(epoch,\n                                                                               accu_loss.item() / (step + 1),\n                                                                               accu_num.item() / sample_num)\n\n    return accu_loss.item() / (step + 1), accu_num.item() / sample_num\n"
  },
  {
    "path": "pytorch_classification/Test1_official_demo/model.py",
    "content": "import torch.nn as nn\nimport torch.nn.functional as F\n\n\nclass LeNet(nn.Module):\n    def __init__(self):\n        super(LeNet, self).__init__()\n        self.conv1 = nn.Conv2d(3, 16, 5)\n        self.pool1 = nn.MaxPool2d(2, 2)\n        self.conv2 = nn.Conv2d(16, 32, 5)\n        self.pool2 = nn.MaxPool2d(2, 2)\n        self.fc1 = nn.Linear(32*5*5, 120)\n        self.fc2 = nn.Linear(120, 84)\n        self.fc3 = nn.Linear(84, 10)\n\n    def forward(self, x):\n        x = F.relu(self.conv1(x))    # input(3, 32, 32) output(16, 28, 28)\n        x = self.pool1(x)            # output(16, 14, 14)\n        x = F.relu(self.conv2(x))    # output(32, 10, 10)\n        x = self.pool2(x)            # output(32, 5, 5)\n        x = x.view(-1, 32*5*5)       # output(32*5*5)\n        x = F.relu(self.fc1(x))      # output(120)\n        x = F.relu(self.fc2(x))      # output(84)\n        x = self.fc3(x)              # output(10)\n        return x\n\n\n"
  },
  {
    "path": "pytorch_classification/Test1_official_demo/predict.py",
    "content": "import torch\nimport torchvision.transforms as transforms\nfrom PIL import Image\n\nfrom model import LeNet\n\n\ndef main():\n    transform = transforms.Compose(\n        [transforms.Resize((32, 32)),\n         transforms.ToTensor(),\n         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])\n\n    classes = ('plane', 'car', 'bird', 'cat',\n               'deer', 'dog', 'frog', 'horse', 'ship', 'truck')\n\n    net = LeNet()\n    net.load_state_dict(torch.load('Lenet.pth'))\n\n    im = Image.open('1.jpg')\n    im = transform(im)  # [C, H, W]\n    im = torch.unsqueeze(im, dim=0)  # [N, C, H, W]\n\n    with torch.no_grad():\n        outputs = net(im)\n        predict = torch.max(outputs, dim=1)[1].numpy()\n    print(classes[int(predict)])\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/Test1_official_demo/train.py",
    "content": "import torch\nimport torchvision\nimport torch.nn as nn\nfrom model import LeNet\nimport torch.optim as optim\nimport torchvision.transforms as transforms\n\n\ndef main():\n    transform = transforms.Compose(\n        [transforms.ToTensor(),\n         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])\n\n    # 50000张训练图片\n    # 第一次使用时要将download设置为True才会自动去下载数据集\n    train_set = torchvision.datasets.CIFAR10(root='./data', train=True,\n                                             download=False, transform=transform)\n    train_loader = torch.utils.data.DataLoader(train_set, batch_size=36,\n                                               shuffle=True, num_workers=0)\n\n    # 10000张验证图片\n    # 第一次使用时要将download设置为True才会自动去下载数据集\n    val_set = torchvision.datasets.CIFAR10(root='./data', train=False,\n                                           download=False, transform=transform)\n    val_loader = torch.utils.data.DataLoader(val_set, batch_size=5000,\n                                             shuffle=False, num_workers=0)\n    val_data_iter = iter(val_loader)\n    val_image, val_label = next(val_data_iter)\n    \n    # classes = ('plane', 'car', 'bird', 'cat',\n    #            'deer', 'dog', 'frog', 'horse', 'ship', 'truck')\n\n    net = LeNet()\n    loss_function = nn.CrossEntropyLoss()\n    optimizer = optim.Adam(net.parameters(), lr=0.001)\n\n    for epoch in range(5):  # loop over the dataset multiple times\n\n        running_loss = 0.0\n        for step, data in enumerate(train_loader, start=0):\n            # get the inputs; data is a list of [inputs, labels]\n            inputs, labels = data\n\n            # zero the parameter gradients\n            optimizer.zero_grad()\n            # forward + backward + optimize\n            outputs = net(inputs)\n            loss = loss_function(outputs, labels)\n            loss.backward()\n            optimizer.step()\n\n            # print statistics\n            running_loss += loss.item()\n            if step % 500 == 499:    # print every 500 mini-batches\n                with torch.no_grad():\n                    outputs = net(val_image)  # [batch, 10]\n                    predict_y = torch.max(outputs, dim=1)[1]\n                    accuracy = torch.eq(predict_y, val_label).sum().item() / val_label.size(0)\n\n                    print('[%d, %5d] train_loss: %.3f  test_accuracy: %.3f' %\n                          (epoch + 1, step + 1, running_loss / 500, accuracy))\n                    running_loss = 0.0\n\n    print('Finished Training')\n\n    save_path = './Lenet.pth'\n    torch.save(net.state_dict(), save_path)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/Test2_alexnet/class_indices.json",
    "content": "{\n    \"0\": \"daisy\",\n    \"1\": \"dandelion\",\n    \"2\": \"roses\",\n    \"3\": \"sunflowers\",\n    \"4\": \"tulips\"\n}"
  },
  {
    "path": "pytorch_classification/Test2_alexnet/model.py",
    "content": "import torch.nn as nn\nimport torch\n\n\nclass AlexNet(nn.Module):\n    def __init__(self, num_classes=1000, init_weights=False):\n        super(AlexNet, self).__init__()\n        self.features = nn.Sequential(\n            nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2),  # input[3, 224, 224]  output[48, 55, 55]\n            nn.ReLU(inplace=True),\n            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[48, 27, 27]\n            nn.Conv2d(48, 128, kernel_size=5, padding=2),           # output[128, 27, 27]\n            nn.ReLU(inplace=True),\n            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[128, 13, 13]\n            nn.Conv2d(128, 192, kernel_size=3, padding=1),          # output[192, 13, 13]\n            nn.ReLU(inplace=True),\n            nn.Conv2d(192, 192, kernel_size=3, padding=1),          # output[192, 13, 13]\n            nn.ReLU(inplace=True),\n            nn.Conv2d(192, 128, kernel_size=3, padding=1),          # output[128, 13, 13]\n            nn.ReLU(inplace=True),\n            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[128, 6, 6]\n        )\n        self.classifier = nn.Sequential(\n            nn.Dropout(p=0.5),\n            nn.Linear(128 * 6 * 6, 2048),\n            nn.ReLU(inplace=True),\n            nn.Dropout(p=0.5),\n            nn.Linear(2048, 2048),\n            nn.ReLU(inplace=True),\n            nn.Linear(2048, num_classes),\n        )\n        if init_weights:\n            self._initialize_weights()\n\n    def forward(self, x):\n        x = self.features(x)\n        x = torch.flatten(x, start_dim=1)\n        x = self.classifier(x)\n        return x\n\n    def _initialize_weights(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                nn.init.constant_(m.bias, 0)\n"
  },
  {
    "path": "pytorch_classification/Test2_alexnet/predict.py",
    "content": "import os\nimport json\n\nimport torch\nfrom PIL import Image\nfrom torchvision import transforms\nimport matplotlib.pyplot as plt\n\nfrom model import AlexNet\n\n\ndef main():\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n\n    data_transform = transforms.Compose(\n        [transforms.Resize((224, 224)),\n         transforms.ToTensor(),\n         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n\n    plt.imshow(img)\n    # [N, C, H, W]\n    img = data_transform(img)\n    # expand batch dimension\n    img = torch.unsqueeze(img, dim=0)\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    # create model\n    model = AlexNet(num_classes=5).to(device)\n\n    # load model weights\n    weights_path = \"./AlexNet.pth\"\n    assert os.path.exists(weights_path), \"file: '{}' dose not exist.\".format(weights_path)\n    model.load_state_dict(torch.load(weights_path))\n\n    model.eval()\n    with torch.no_grad():\n        # predict class\n        output = torch.squeeze(model(img.to(device))).cpu()\n        predict = torch.softmax(output, dim=0)\n        predict_cla = torch.argmax(predict).numpy()\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_cla)],\n                                                 predict[predict_cla].numpy())\n    plt.title(print_res)\n    for i in range(len(predict)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  predict[i].numpy()))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/Test2_alexnet/train.py",
    "content": "import os\nimport sys\nimport json\n\nimport torch\nimport torch.nn as nn\nfrom torchvision import transforms, datasets, utils\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport torch.optim as optim\nfrom tqdm import tqdm\n\nfrom model import AlexNet\n\n\ndef main():\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n    print(\"using {} device.\".format(device))\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.RandomResizedCrop(224),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),\n        \"val\": transforms.Compose([transforms.Resize((224, 224)),  # cannot 224, must (224, 224)\n                                   transforms.ToTensor(),\n                                   transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}\n\n    data_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\n    image_path = os.path.join(data_root, \"data_set\", \"flower_data\")  # flower data set path\n    assert os.path.exists(image_path), \"{} path does not exist.\".format(image_path)\n    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, \"train\"),\n                                         transform=data_transform[\"train\"])\n    train_num = len(train_dataset)\n\n    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}\n    flower_list = train_dataset.class_to_idx\n    cla_dict = dict((val, key) for key, val in flower_list.items())\n    # write dict into json file\n    json_str = json.dumps(cla_dict, indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    batch_size = 32\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using {} dataloader workers every process'.format(nw))\n\n    train_loader = torch.utils.data.DataLoader(train_dataset,\n                                               batch_size=batch_size, shuffle=True,\n                                               num_workers=nw)\n\n    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, \"val\"),\n                                            transform=data_transform[\"val\"])\n    val_num = len(validate_dataset)\n    validate_loader = torch.utils.data.DataLoader(validate_dataset,\n                                                  batch_size=4, shuffle=False,\n                                                  num_workers=nw)\n\n    print(\"using {} images for training, {} images for validation.\".format(train_num,\n                                                                           val_num))\n    # test_data_iter = iter(validate_loader)\n    # test_image, test_label = test_data_iter.next()\n    #\n    # def imshow(img):\n    #     img = img / 2 + 0.5  # unnormalize\n    #     npimg = img.numpy()\n    #     plt.imshow(np.transpose(npimg, (1, 2, 0)))\n    #     plt.show()\n    #\n    # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))\n    # imshow(utils.make_grid(test_image))\n\n    net = AlexNet(num_classes=5, init_weights=True)\n\n    net.to(device)\n    loss_function = nn.CrossEntropyLoss()\n    # pata = list(net.parameters())\n    optimizer = optim.Adam(net.parameters(), lr=0.0002)\n\n    epochs = 10\n    save_path = './AlexNet.pth'\n    best_acc = 0.0\n    train_steps = len(train_loader)\n    for epoch in range(epochs):\n        # train\n        net.train()\n        running_loss = 0.0\n        train_bar = tqdm(train_loader, file=sys.stdout)\n        for step, data in enumerate(train_bar):\n            images, labels = data\n            optimizer.zero_grad()\n            outputs = net(images.to(device))\n            loss = loss_function(outputs, labels.to(device))\n            loss.backward()\n            optimizer.step()\n\n            # print statistics\n            running_loss += loss.item()\n\n            train_bar.desc = \"train epoch[{}/{}] loss:{:.3f}\".format(epoch + 1,\n                                                                     epochs,\n                                                                     loss)\n\n        # validate\n        net.eval()\n        acc = 0.0  # accumulate accurate number / epoch\n        with torch.no_grad():\n            val_bar = tqdm(validate_loader, file=sys.stdout)\n            for val_data in val_bar:\n                val_images, val_labels = val_data\n                outputs = net(val_images.to(device))\n                predict_y = torch.max(outputs, dim=1)[1]\n                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()\n\n        val_accurate = acc / val_num\n        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %\n              (epoch + 1, running_loss / train_steps, val_accurate))\n\n        if val_accurate > best_acc:\n            best_acc = val_accurate\n            torch.save(net.state_dict(), save_path)\n\n    print('Finished Training')\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/Test3_vggnet/class_indices.json",
    "content": "{\n    \"0\": \"daisy\",\n    \"1\": \"dandelion\",\n    \"2\": \"roses\",\n    \"3\": \"sunflowers\",\n    \"4\": \"tulips\"\n}"
  },
  {
    "path": "pytorch_classification/Test3_vggnet/model.py",
    "content": "import torch.nn as nn\nimport torch\n\n# official pretrain weights\nmodel_urls = {\n    'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',\n    'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',\n    'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',\n    'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth'\n}\n\n\nclass VGG(nn.Module):\n    def __init__(self, features, num_classes=1000, init_weights=False):\n        super(VGG, self).__init__()\n        self.features = features\n        self.classifier = nn.Sequential(\n            nn.Linear(512*7*7, 4096),\n            nn.ReLU(True),\n            nn.Dropout(p=0.5),\n            nn.Linear(4096, 4096),\n            nn.ReLU(True),\n            nn.Dropout(p=0.5),\n            nn.Linear(4096, num_classes)\n        )\n        if init_weights:\n            self._initialize_weights()\n\n    def forward(self, x):\n        # N x 3 x 224 x 224\n        x = self.features(x)\n        # N x 512 x 7 x 7\n        x = torch.flatten(x, start_dim=1)\n        # N x 512*7*7\n        x = self.classifier(x)\n        return x\n\n    def _initialize_weights(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')\n                nn.init.xavier_uniform_(m.weight)\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.Linear):\n                nn.init.xavier_uniform_(m.weight)\n                # nn.init.normal_(m.weight, 0, 0.01)\n                nn.init.constant_(m.bias, 0)\n\n\ndef make_features(cfg: list):\n    layers = []\n    in_channels = 3\n    for v in cfg:\n        if v == \"M\":\n            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]\n        else:\n            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)\n            layers += [conv2d, nn.ReLU(True)]\n            in_channels = v\n    return nn.Sequential(*layers)\n\n\ncfgs = {\n    'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],\n    'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],\n    'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],\n    'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],\n}\n\n\ndef vgg(model_name=\"vgg16\", **kwargs):\n    assert model_name in cfgs, \"Warning: model number {} not in cfgs dict!\".format(model_name)\n    cfg = cfgs[model_name]\n\n    model = VGG(make_features(cfg), **kwargs)\n    return model\n"
  },
  {
    "path": "pytorch_classification/Test3_vggnet/predict.py",
    "content": "import os\nimport json\n\nimport torch\nfrom PIL import Image\nfrom torchvision import transforms\nimport matplotlib.pyplot as plt\n\nfrom model import vgg\n\n\ndef main():\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n\n    data_transform = transforms.Compose(\n        [transforms.Resize((224, 224)),\n         transforms.ToTensor(),\n         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n    plt.imshow(img)\n    # [N, C, H, W]\n    img = data_transform(img)\n    # expand batch dimension\n    img = torch.unsqueeze(img, dim=0)\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n    \n    # create model\n    model = vgg(model_name=\"vgg16\", num_classes=5).to(device)\n    # load model weights\n    weights_path = \"./vgg16Net.pth\"\n    assert os.path.exists(weights_path), \"file: '{}' dose not exist.\".format(weights_path)\n    model.load_state_dict(torch.load(weights_path, map_location=device))\n\n    model.eval()\n    with torch.no_grad():\n        # predict class\n        output = torch.squeeze(model(img.to(device))).cpu()\n        predict = torch.softmax(output, dim=0)\n        predict_cla = torch.argmax(predict).numpy()\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_cla)],\n                                                 predict[predict_cla].numpy())\n    plt.title(print_res)\n    for i in range(len(predict)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  predict[i].numpy()))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/Test3_vggnet/train.py",
    "content": "import os\nimport sys\nimport json\n\nimport torch\nimport torch.nn as nn\nfrom torchvision import transforms, datasets\nimport torch.optim as optim\nfrom tqdm import tqdm\n\nfrom model import vgg\n\n\ndef main():\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n    print(\"using {} device.\".format(device))\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.RandomResizedCrop(224),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),\n        \"val\": transforms.Compose([transforms.Resize((224, 224)),\n                                   transforms.ToTensor(),\n                                   transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}\n\n    data_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\n    image_path = os.path.join(data_root, \"data_set\", \"flower_data\")  # flower data set path\n    assert os.path.exists(image_path), \"{} path does not exist.\".format(image_path)\n    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, \"train\"),\n                                         transform=data_transform[\"train\"])\n    train_num = len(train_dataset)\n\n    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}\n    flower_list = train_dataset.class_to_idx\n    cla_dict = dict((val, key) for key, val in flower_list.items())\n    # write dict into json file\n    json_str = json.dumps(cla_dict, indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    batch_size = 32\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using {} dataloader workers every process'.format(nw))\n\n    train_loader = torch.utils.data.DataLoader(train_dataset,\n                                               batch_size=batch_size, shuffle=True,\n                                               num_workers=nw)\n\n    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, \"val\"),\n                                            transform=data_transform[\"val\"])\n    val_num = len(validate_dataset)\n    validate_loader = torch.utils.data.DataLoader(validate_dataset,\n                                                  batch_size=batch_size, shuffle=False,\n                                                  num_workers=nw)\n    print(\"using {} images for training, {} images for validation.\".format(train_num,\n                                                                           val_num))\n\n    # test_data_iter = iter(validate_loader)\n    # test_image, test_label = test_data_iter.next()\n\n    model_name = \"vgg16\"\n    net = vgg(model_name=model_name, num_classes=5, init_weights=True)\n    net.to(device)\n    loss_function = nn.CrossEntropyLoss()\n    optimizer = optim.Adam(net.parameters(), lr=0.0001)\n\n    epochs = 30\n    best_acc = 0.0\n    save_path = './{}Net.pth'.format(model_name)\n    train_steps = len(train_loader)\n    for epoch in range(epochs):\n        # train\n        net.train()\n        running_loss = 0.0\n        train_bar = tqdm(train_loader, file=sys.stdout)\n        for step, data in enumerate(train_bar):\n            images, labels = data\n            optimizer.zero_grad()\n            outputs = net(images.to(device))\n            loss = loss_function(outputs, labels.to(device))\n            loss.backward()\n            optimizer.step()\n\n            # print statistics\n            running_loss += loss.item()\n\n            train_bar.desc = \"train epoch[{}/{}] loss:{:.3f}\".format(epoch + 1,\n                                                                     epochs,\n                                                                     loss)\n\n        # validate\n        net.eval()\n        acc = 0.0  # accumulate accurate number / epoch\n        with torch.no_grad():\n            val_bar = tqdm(validate_loader, file=sys.stdout)\n            for val_data in val_bar:\n                val_images, val_labels = val_data\n                outputs = net(val_images.to(device))\n                predict_y = torch.max(outputs, dim=1)[1]\n                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()\n\n        val_accurate = acc / val_num\n        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %\n              (epoch + 1, running_loss / train_steps, val_accurate))\n\n        if val_accurate > best_acc:\n            best_acc = val_accurate\n            torch.save(net.state_dict(), save_path)\n\n    print('Finished Training')\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/Test4_googlenet/class_indices.json",
    "content": "{\n    \"0\": \"daisy\",\n    \"1\": \"dandelion\",\n    \"2\": \"roses\",\n    \"3\": \"sunflowers\",\n    \"4\": \"tulips\"\n}"
  },
  {
    "path": "pytorch_classification/Test4_googlenet/model.py",
    "content": "import torch.nn as nn\nimport torch\nimport torch.nn.functional as F\n\n\nclass GoogLeNet(nn.Module):\n    def __init__(self, num_classes=1000, aux_logits=True, init_weights=False):\n        super(GoogLeNet, self).__init__()\n        self.aux_logits = aux_logits\n\n        self.conv1 = BasicConv2d(3, 64, kernel_size=7, stride=2, padding=3)\n        self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True)\n\n        self.conv2 = BasicConv2d(64, 64, kernel_size=1)\n        self.conv3 = BasicConv2d(64, 192, kernel_size=3, padding=1)\n        self.maxpool2 = nn.MaxPool2d(3, stride=2, ceil_mode=True)\n\n        self.inception3a = Inception(192, 64, 96, 128, 16, 32, 32)\n        self.inception3b = Inception(256, 128, 128, 192, 32, 96, 64)\n        self.maxpool3 = nn.MaxPool2d(3, stride=2, ceil_mode=True)\n\n        self.inception4a = Inception(480, 192, 96, 208, 16, 48, 64)\n        self.inception4b = Inception(512, 160, 112, 224, 24, 64, 64)\n        self.inception4c = Inception(512, 128, 128, 256, 24, 64, 64)\n        self.inception4d = Inception(512, 112, 144, 288, 32, 64, 64)\n        self.inception4e = Inception(528, 256, 160, 320, 32, 128, 128)\n        self.maxpool4 = nn.MaxPool2d(3, stride=2, ceil_mode=True)\n\n        self.inception5a = Inception(832, 256, 160, 320, 32, 128, 128)\n        self.inception5b = Inception(832, 384, 192, 384, 48, 128, 128)\n\n        if self.aux_logits:\n            self.aux1 = InceptionAux(512, num_classes)\n            self.aux2 = InceptionAux(528, num_classes)\n\n        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))\n        self.dropout = nn.Dropout(0.4)\n        self.fc = nn.Linear(1024, num_classes)\n        if init_weights:\n            self._initialize_weights()\n\n    def forward(self, x):\n        # N x 3 x 224 x 224\n        x = self.conv1(x)\n        # N x 64 x 112 x 112\n        x = self.maxpool1(x)\n        # N x 64 x 56 x 56\n        x = self.conv2(x)\n        # N x 64 x 56 x 56\n        x = self.conv3(x)\n        # N x 192 x 56 x 56\n        x = self.maxpool2(x)\n\n        # N x 192 x 28 x 28\n        x = self.inception3a(x)\n        # N x 256 x 28 x 28\n        x = self.inception3b(x)\n        # N x 480 x 28 x 28\n        x = self.maxpool3(x)\n        # N x 480 x 14 x 14\n        x = self.inception4a(x)\n        # N x 512 x 14 x 14\n        if self.training and self.aux_logits:    # eval model lose this layer\n            aux1 = self.aux1(x)\n\n        x = self.inception4b(x)\n        # N x 512 x 14 x 14\n        x = self.inception4c(x)\n        # N x 512 x 14 x 14\n        x = self.inception4d(x)\n        # N x 528 x 14 x 14\n        if self.training and self.aux_logits:    # eval model lose this layer\n            aux2 = self.aux2(x)\n\n        x = self.inception4e(x)\n        # N x 832 x 14 x 14\n        x = self.maxpool4(x)\n        # N x 832 x 7 x 7\n        x = self.inception5a(x)\n        # N x 832 x 7 x 7\n        x = self.inception5b(x)\n        # N x 1024 x 7 x 7\n\n        x = self.avgpool(x)\n        # N x 1024 x 1 x 1\n        x = torch.flatten(x, 1)\n        # N x 1024\n        x = self.dropout(x)\n        x = self.fc(x)\n        # N x 1000 (num_classes)\n        if self.training and self.aux_logits:   # eval model lose this layer\n            return x, aux2, aux1\n        return x\n\n    def _initialize_weights(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                nn.init.constant_(m.bias, 0)\n\n\nclass Inception(nn.Module):\n    def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj):\n        super(Inception, self).__init__()\n\n        self.branch1 = BasicConv2d(in_channels, ch1x1, kernel_size=1)\n\n        self.branch2 = nn.Sequential(\n            BasicConv2d(in_channels, ch3x3red, kernel_size=1),\n            BasicConv2d(ch3x3red, ch3x3, kernel_size=3, padding=1)   # 保证输出大小等于输入大小\n        )\n\n        self.branch3 = nn.Sequential(\n            BasicConv2d(in_channels, ch5x5red, kernel_size=1),\n            # 在官方的实现中，其实是3x3的kernel并不是5x5，这里我也懒得改了，具体可以参考下面的issue\n            # Please see https://github.com/pytorch/vision/issues/906 for details.\n            BasicConv2d(ch5x5red, ch5x5, kernel_size=5, padding=2)   # 保证输出大小等于输入大小\n        )\n\n        self.branch4 = nn.Sequential(\n            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),\n            BasicConv2d(in_channels, pool_proj, kernel_size=1)\n        )\n\n    def forward(self, x):\n        branch1 = self.branch1(x)\n        branch2 = self.branch2(x)\n        branch3 = self.branch3(x)\n        branch4 = self.branch4(x)\n\n        outputs = [branch1, branch2, branch3, branch4]\n        return torch.cat(outputs, 1)\n\n\nclass InceptionAux(nn.Module):\n    def __init__(self, in_channels, num_classes):\n        super(InceptionAux, self).__init__()\n        self.averagePool = nn.AvgPool2d(kernel_size=5, stride=3)\n        self.conv = BasicConv2d(in_channels, 128, kernel_size=1)  # output[batch, 128, 4, 4]\n\n        self.fc1 = nn.Linear(2048, 1024)\n        self.fc2 = nn.Linear(1024, num_classes)\n\n    def forward(self, x):\n        # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14\n        x = self.averagePool(x)\n        # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4\n        x = self.conv(x)\n        # N x 128 x 4 x 4\n        x = torch.flatten(x, 1)\n        x = F.dropout(x, 0.5, training=self.training)\n        # N x 2048\n        x = F.relu(self.fc1(x), inplace=True)\n        x = F.dropout(x, 0.5, training=self.training)\n        # N x 1024\n        x = self.fc2(x)\n        # N x num_classes\n        return x\n\n\nclass BasicConv2d(nn.Module):\n    def __init__(self, in_channels, out_channels, **kwargs):\n        super(BasicConv2d, self).__init__()\n        self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)\n        self.relu = nn.ReLU(inplace=True)\n\n    def forward(self, x):\n        x = self.conv(x)\n        x = self.relu(x)\n        return x\n"
  },
  {
    "path": "pytorch_classification/Test4_googlenet/predict.py",
    "content": "import os\nimport json\n\nimport torch\nfrom PIL import Image\nfrom torchvision import transforms\nimport matplotlib.pyplot as plt\n\nfrom model import GoogLeNet\n\n\ndef main():\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n\n    data_transform = transforms.Compose(\n        [transforms.Resize((224, 224)),\n         transforms.ToTensor(),\n         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n    plt.imshow(img)\n    # [N, C, H, W]\n    img = data_transform(img)\n    # expand batch dimension\n    img = torch.unsqueeze(img, dim=0)\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    # create model\n    model = GoogLeNet(num_classes=5, aux_logits=False).to(device)\n\n    # load model weights\n    weights_path = \"./googleNet.pth\"\n    assert os.path.exists(weights_path), \"file: '{}' dose not exist.\".format(weights_path)\n    missing_keys, unexpected_keys = model.load_state_dict(torch.load(weights_path, map_location=device),\n                                                          strict=False)\n\n    model.eval()\n    with torch.no_grad():\n        # predict class\n        output = torch.squeeze(model(img.to(device))).cpu()\n        predict = torch.softmax(output, dim=0)\n        predict_cla = torch.argmax(predict).numpy()\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_cla)],\n                                                 predict[predict_cla].numpy())\n    plt.title(print_res)\n    for i in range(len(predict)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  predict[i].numpy()))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/Test4_googlenet/train.py",
    "content": "import os\nimport sys\nimport json\n\nimport torch\nimport torch.nn as nn\nfrom torchvision import transforms, datasets\nimport torch.optim as optim\nfrom tqdm import tqdm\n\nfrom model import GoogLeNet\n\n\ndef main():\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n    print(\"using {} device.\".format(device))\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.RandomResizedCrop(224),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),\n        \"val\": transforms.Compose([transforms.Resize((224, 224)),\n                                   transforms.ToTensor(),\n                                   transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}\n\n    data_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\n    image_path = os.path.join(data_root, \"data_set\", \"flower_data\")  # flower data set path\n    assert os.path.exists(image_path), \"{} path does not exist.\".format(image_path)\n    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, \"train\"),\n                                         transform=data_transform[\"train\"])\n    train_num = len(train_dataset)\n\n    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}\n    flower_list = train_dataset.class_to_idx\n    cla_dict = dict((val, key) for key, val in flower_list.items())\n    # write dict into json file\n    json_str = json.dumps(cla_dict, indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    batch_size = 32\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using {} dataloader workers every process'.format(nw))\n\n    train_loader = torch.utils.data.DataLoader(train_dataset,\n                                               batch_size=batch_size, shuffle=True,\n                                               num_workers=nw)\n\n    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, \"val\"),\n                                            transform=data_transform[\"val\"])\n    val_num = len(validate_dataset)\n    validate_loader = torch.utils.data.DataLoader(validate_dataset,\n                                                  batch_size=batch_size, shuffle=False,\n                                                  num_workers=nw)\n\n    print(\"using {} images for training, {} images for validation.\".format(train_num,\n                                                                           val_num))\n\n    # test_data_iter = iter(validate_loader)\n    # test_image, test_label = test_data_iter.next()\n\n    net = GoogLeNet(num_classes=5, aux_logits=True, init_weights=True)\n    # 如果要使用官方的预训练权重，注意是将权重载入官方的模型，不是我们自己实现的模型\n    # 官方的模型中使用了bn层以及改了一些参数，不能混用\n    # import torchvision\n    # net = torchvision.models.googlenet(num_classes=5)\n    # model_dict = net.state_dict()\n    # # 预训练权重下载地址: https://download.pytorch.org/models/googlenet-1378be20.pth\n    # pretrain_model = torch.load(\"googlenet.pth\")\n    # del_list = [\"aux1.fc2.weight\", \"aux1.fc2.bias\",\n    #             \"aux2.fc2.weight\", \"aux2.fc2.bias\",\n    #             \"fc.weight\", \"fc.bias\"]\n    # pretrain_dict = {k: v for k, v in pretrain_model.items() if k not in del_list}\n    # model_dict.update(pretrain_dict)\n    # net.load_state_dict(model_dict)\n    net.to(device)\n    loss_function = nn.CrossEntropyLoss()\n    optimizer = optim.Adam(net.parameters(), lr=0.0003)\n\n    epochs = 30\n    best_acc = 0.0\n    save_path = './googleNet.pth'\n    train_steps = len(train_loader)\n    for epoch in range(epochs):\n        # train\n        net.train()\n        running_loss = 0.0\n        train_bar = tqdm(train_loader, file=sys.stdout)\n        for step, data in enumerate(train_bar):\n            images, labels = data\n            optimizer.zero_grad()\n            logits, aux_logits2, aux_logits1 = net(images.to(device))\n            loss0 = loss_function(logits, labels.to(device))\n            loss1 = loss_function(aux_logits1, labels.to(device))\n            loss2 = loss_function(aux_logits2, labels.to(device))\n            loss = loss0 + loss1 * 0.3 + loss2 * 0.3\n            loss.backward()\n            optimizer.step()\n\n            # print statistics\n            running_loss += loss.item()\n\n            train_bar.desc = \"train epoch[{}/{}] loss:{:.3f}\".format(epoch + 1,\n                                                                     epochs,\n                                                                     loss)\n\n        # validate\n        net.eval()\n        acc = 0.0  # accumulate accurate number / epoch\n        with torch.no_grad():\n            val_bar = tqdm(validate_loader, file=sys.stdout)\n            for val_data in val_bar:\n                val_images, val_labels = val_data\n                outputs = net(val_images.to(device))  # eval model only have last output layer\n                predict_y = torch.max(outputs, dim=1)[1]\n                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()\n\n        val_accurate = acc / val_num\n        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %\n              (epoch + 1, running_loss / train_steps, val_accurate))\n\n        if val_accurate > best_acc:\n            best_acc = val_accurate\n            torch.save(net.state_dict(), save_path)\n\n    print('Finished Training')\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/Test5_resnet/README.md",
    "content": "## 文件结构：\n```\n  ├── model.py: ResNet模型搭建\n  ├── train.py: 训练脚本\n  ├── predict.py: 单张图像预测脚本\n  └── batch_predict.py: 批量图像预测脚本\n```"
  },
  {
    "path": "pytorch_classification/Test5_resnet/batch_predict.py",
    "content": "import os\nimport json\n\nimport torch\nfrom PIL import Image\nfrom torchvision import transforms\n\nfrom model import resnet34\n\n\ndef main():\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n\n    data_transform = transforms.Compose(\n        [transforms.Resize(256),\n         transforms.CenterCrop(224),\n         transforms.ToTensor(),\n         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])\n\n    # load image\n    # 指向需要遍历预测的图像文件夹\n    imgs_root = \"/data/imgs\"\n    assert os.path.exists(imgs_root), f\"file: '{imgs_root}' dose not exist.\"\n    # 读取指定文件夹下所有jpg图像路径\n    img_path_list = [os.path.join(imgs_root, i) for i in os.listdir(imgs_root) if i.endswith(\".jpg\")]\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), f\"file: '{json_path}' dose not exist.\"\n\n    json_file = open(json_path, \"r\")\n    class_indict = json.load(json_file)\n\n    # create model\n    model = resnet34(num_classes=5).to(device)\n\n    # load model weights\n    weights_path = \"./resNet34.pth\"\n    assert os.path.exists(weights_path), f\"file: '{weights_path}' dose not exist.\"\n    model.load_state_dict(torch.load(weights_path, map_location=device))\n\n    # prediction\n    model.eval()\n    batch_size = 8  # 每次预测时将多少张图片打包成一个batch\n    with torch.no_grad():\n        for ids in range(0, len(img_path_list) // batch_size):\n            img_list = []\n            for img_path in img_path_list[ids * batch_size: (ids + 1) * batch_size]:\n                assert os.path.exists(img_path), f\"file: '{img_path}' dose not exist.\"\n                img = Image.open(img_path)\n                img = data_transform(img)\n                img_list.append(img)\n\n            # batch img\n            # 将img_list列表中的所有图像打包成一个batch\n            batch_img = torch.stack(img_list, dim=0)\n            # predict class\n            output = model(batch_img.to(device)).cpu()\n            predict = torch.softmax(output, dim=1)\n            probs, classes = torch.max(predict, dim=1)\n\n            for idx, (pro, cla) in enumerate(zip(probs, classes)):\n                print(\"image: {}  class: {}  prob: {:.3}\".format(img_path_list[ids * batch_size + idx],\n                                                                 class_indict[str(cla.numpy())],\n                                                                 pro.numpy()))\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/Test5_resnet/class_indices.json",
    "content": "{\n    \"0\": \"daisy\",\n    \"1\": \"dandelion\",\n    \"2\": \"roses\",\n    \"3\": \"sunflowers\",\n    \"4\": \"tulips\"\n}"
  },
  {
    "path": "pytorch_classification/Test5_resnet/load_weights.py",
    "content": "import os\nimport torch\nimport torch.nn as nn\nfrom model import resnet34\n\n\ndef main():\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n\n    # load pretrain weights\n    # download url: https://download.pytorch.org/models/resnet34-333f7ec4.pth\n    model_weight_path = \"./resnet34-pre.pth\"\n    assert os.path.exists(model_weight_path), \"file {} does not exist.\".format(model_weight_path)\n\n    # option1\n    net = resnet34()\n    net.load_state_dict(torch.load(model_weight_path, map_location=device))\n    # change fc layer structure\n    in_channel = net.fc.in_features\n    net.fc = nn.Linear(in_channel, 5)\n\n    # option2\n    # net = resnet34(num_classes=5)\n    # pre_weights = torch.load(model_weight_path, map_location=device)\n    # del_key = []\n    # for key, _ in pre_weights.items():\n    #     if \"fc\" in key:\n    #         del_key.append(key)\n    #\n    # for key in del_key:\n    #     del pre_weights[key]\n    #\n    # missing_keys, unexpected_keys = net.load_state_dict(pre_weights, strict=False)\n    # print(\"[missing_keys]:\", *missing_keys, sep=\"\\n\")\n    # print(\"[unexpected_keys]:\", *unexpected_keys, sep=\"\\n\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/Test5_resnet/model.py",
    "content": "import torch.nn as nn\nimport torch\n\n\nclass BasicBlock(nn.Module):\n    expansion = 1\n\n    def __init__(self, in_channel, out_channel, stride=1, downsample=None, **kwargs):\n        super(BasicBlock, self).__init__()\n        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,\n                               kernel_size=3, stride=stride, padding=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(out_channel)\n        self.relu = nn.ReLU()\n        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,\n                               kernel_size=3, stride=1, padding=1, bias=False)\n        self.bn2 = nn.BatchNorm2d(out_channel)\n        self.downsample = downsample\n\n    def forward(self, x):\n        identity = x\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass Bottleneck(nn.Module):\n    \"\"\"\n    注意：原论文中，在虚线残差结构的主分支上，第一个1x1卷积层的步距是2，第二个3x3卷积层步距是1。\n    但在pytorch官方实现过程中是第一个1x1卷积层的步距是1，第二个3x3卷积层步距是2，\n    这么做的好处是能够在top1上提升大概0.5%的准确率。\n    可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch\n    \"\"\"\n    expansion = 4\n\n    def __init__(self, in_channel, out_channel, stride=1, downsample=None,\n                 groups=1, width_per_group=64):\n        super(Bottleneck, self).__init__()\n\n        width = int(out_channel * (width_per_group / 64.)) * groups\n\n        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=width,\n                               kernel_size=1, stride=1, bias=False)  # squeeze channels\n        self.bn1 = nn.BatchNorm2d(width)\n        # -----------------------------------------\n        self.conv2 = nn.Conv2d(in_channels=width, out_channels=width, groups=groups,\n                               kernel_size=3, stride=stride, bias=False, padding=1)\n        self.bn2 = nn.BatchNorm2d(width)\n        # -----------------------------------------\n        self.conv3 = nn.Conv2d(in_channels=width, out_channels=out_channel*self.expansion,\n                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels\n        self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n\n    def forward(self, x):\n        identity = x\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass ResNet(nn.Module):\n\n    def __init__(self,\n                 block,\n                 blocks_num,\n                 num_classes=1000,\n                 include_top=True,\n                 groups=1,\n                 width_per_group=64):\n        super(ResNet, self).__init__()\n        self.include_top = include_top\n        self.in_channel = 64\n\n        self.groups = groups\n        self.width_per_group = width_per_group\n\n        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,\n                               padding=3, bias=False)\n        self.bn1 = nn.BatchNorm2d(self.in_channel)\n        self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.layer1 = self._make_layer(block, 64, blocks_num[0])\n        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)\n        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)\n        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)\n        if self.include_top:\n            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)\n            self.fc = nn.Linear(512 * block.expansion, num_classes)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')\n\n    def _make_layer(self, block, channel, block_num, stride=1):\n        downsample = None\n        if stride != 1 or self.in_channel != channel * block.expansion:\n            downsample = nn.Sequential(\n                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),\n                nn.BatchNorm2d(channel * block.expansion))\n\n        layers = []\n        layers.append(block(self.in_channel,\n                            channel,\n                            downsample=downsample,\n                            stride=stride,\n                            groups=self.groups,\n                            width_per_group=self.width_per_group))\n        self.in_channel = channel * block.expansion\n\n        for _ in range(1, block_num):\n            layers.append(block(self.in_channel,\n                                channel,\n                                groups=self.groups,\n                                width_per_group=self.width_per_group))\n\n        return nn.Sequential(*layers)\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n\n        if self.include_top:\n            x = self.avgpool(x)\n            x = torch.flatten(x, 1)\n            x = self.fc(x)\n\n        return x\n\n\ndef resnet34(num_classes=1000, include_top=True):\n    # https://download.pytorch.org/models/resnet34-333f7ec4.pth\n    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)\n\n\ndef resnet50(num_classes=1000, include_top=True):\n    # https://download.pytorch.org/models/resnet50-19c8e357.pth\n    return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)\n\n\ndef resnet101(num_classes=1000, include_top=True):\n    # https://download.pytorch.org/models/resnet101-5d3b4d8f.pth\n    return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)\n\n\ndef resnext50_32x4d(num_classes=1000, include_top=True):\n    # https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth\n    groups = 32\n    width_per_group = 4\n    return ResNet(Bottleneck, [3, 4, 6, 3],\n                  num_classes=num_classes,\n                  include_top=include_top,\n                  groups=groups,\n                  width_per_group=width_per_group)\n\n\ndef resnext101_32x8d(num_classes=1000, include_top=True):\n    # https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth\n    groups = 32\n    width_per_group = 8\n    return ResNet(Bottleneck, [3, 4, 23, 3],\n                  num_classes=num_classes,\n                  include_top=include_top,\n                  groups=groups,\n                  width_per_group=width_per_group)\n"
  },
  {
    "path": "pytorch_classification/Test5_resnet/predict.py",
    "content": "import os\nimport json\n\nimport torch\nfrom PIL import Image\nfrom torchvision import transforms\nimport matplotlib.pyplot as plt\n\nfrom model import resnet34\n\n\ndef main():\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n\n    data_transform = transforms.Compose(\n        [transforms.Resize(256),\n         transforms.CenterCrop(224),\n         transforms.ToTensor(),\n         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n    plt.imshow(img)\n    # [N, C, H, W]\n    img = data_transform(img)\n    # expand batch dimension\n    img = torch.unsqueeze(img, dim=0)\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    # create model\n    model = resnet34(num_classes=5).to(device)\n\n    # load model weights\n    weights_path = \"./resNet34.pth\"\n    assert os.path.exists(weights_path), \"file: '{}' dose not exist.\".format(weights_path)\n    model.load_state_dict(torch.load(weights_path, map_location=device))\n\n    # prediction\n    model.eval()\n    with torch.no_grad():\n        # predict class\n        output = torch.squeeze(model(img.to(device))).cpu()\n        predict = torch.softmax(output, dim=0)\n        predict_cla = torch.argmax(predict).numpy()\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_cla)],\n                                                 predict[predict_cla].numpy())\n    plt.title(print_res)\n    for i in range(len(predict)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  predict[i].numpy()))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/Test5_resnet/train.py",
    "content": "import os\nimport sys\nimport json\n\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nfrom torchvision import transforms, datasets\nfrom tqdm import tqdm\n\nfrom model import resnet34\n\n\ndef main():\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n    print(\"using {} device.\".format(device))\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.RandomResizedCrop(224),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),\n        \"val\": transforms.Compose([transforms.Resize(256),\n                                   transforms.CenterCrop(224),\n                                   transforms.ToTensor(),\n                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}\n\n    data_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\n    image_path = os.path.join(data_root, \"data_set\", \"flower_data\")  # flower data set path\n    assert os.path.exists(image_path), \"{} path does not exist.\".format(image_path)\n    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, \"train\"),\n                                         transform=data_transform[\"train\"])\n    train_num = len(train_dataset)\n\n    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}\n    flower_list = train_dataset.class_to_idx\n    cla_dict = dict((val, key) for key, val in flower_list.items())\n    # write dict into json file\n    json_str = json.dumps(cla_dict, indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    batch_size = 16\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using {} dataloader workers every process'.format(nw))\n\n    train_loader = torch.utils.data.DataLoader(train_dataset,\n                                               batch_size=batch_size, shuffle=True,\n                                               num_workers=nw)\n\n    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, \"val\"),\n                                            transform=data_transform[\"val\"])\n    val_num = len(validate_dataset)\n    validate_loader = torch.utils.data.DataLoader(validate_dataset,\n                                                  batch_size=batch_size, shuffle=False,\n                                                  num_workers=nw)\n\n    print(\"using {} images for training, {} images for validation.\".format(train_num,\n                                                                           val_num))\n    \n    net = resnet34()\n    # load pretrain weights\n    # download url: https://download.pytorch.org/models/resnet34-333f7ec4.pth\n    model_weight_path = \"./resnet34-pre.pth\"\n    assert os.path.exists(model_weight_path), \"file {} does not exist.\".format(model_weight_path)\n    net.load_state_dict(torch.load(model_weight_path, map_location='cpu'))\n    # for param in net.parameters():\n    #     param.requires_grad = False\n\n    # change fc layer structure\n    in_channel = net.fc.in_features\n    net.fc = nn.Linear(in_channel, 5)\n    net.to(device)\n\n    # define loss function\n    loss_function = nn.CrossEntropyLoss()\n\n    # construct an optimizer\n    params = [p for p in net.parameters() if p.requires_grad]\n    optimizer = optim.Adam(params, lr=0.0001)\n\n    epochs = 3\n    best_acc = 0.0\n    save_path = './resNet34.pth'\n    train_steps = len(train_loader)\n    for epoch in range(epochs):\n        # train\n        net.train()\n        running_loss = 0.0\n        train_bar = tqdm(train_loader, file=sys.stdout)\n        for step, data in enumerate(train_bar):\n            images, labels = data\n            optimizer.zero_grad()\n            logits = net(images.to(device))\n            loss = loss_function(logits, labels.to(device))\n            loss.backward()\n            optimizer.step()\n\n            # print statistics\n            running_loss += loss.item()\n\n            train_bar.desc = \"train epoch[{}/{}] loss:{:.3f}\".format(epoch + 1,\n                                                                     epochs,\n                                                                     loss)\n\n        # validate\n        net.eval()\n        acc = 0.0  # accumulate accurate number / epoch\n        with torch.no_grad():\n            val_bar = tqdm(validate_loader, file=sys.stdout)\n            for val_data in val_bar:\n                val_images, val_labels = val_data\n                outputs = net(val_images.to(device))\n                # loss = loss_function(outputs, test_labels)\n                predict_y = torch.max(outputs, dim=1)[1]\n                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()\n\n                val_bar.desc = \"valid epoch[{}/{}]\".format(epoch + 1,\n                                                           epochs)\n\n        val_accurate = acc / val_num\n        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %\n              (epoch + 1, running_loss / train_steps, val_accurate))\n\n        if val_accurate > best_acc:\n            best_acc = val_accurate\n            torch.save(net.state_dict(), save_path)\n\n    print('Finished Training')\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/Test6_mobilenet/class_indices.json",
    "content": "{\n    \"0\": \"daisy\",\n    \"1\": \"dandelion\",\n    \"2\": \"roses\",\n    \"3\": \"sunflowers\",\n    \"4\": \"tulips\"\n}"
  },
  {
    "path": "pytorch_classification/Test6_mobilenet/model_v2.py",
    "content": "from torch import nn\nimport torch\n\n\ndef _make_divisible(ch, divisor=8, min_ch=None):\n    \"\"\"\n    This function is taken from the original tf repo.\n    It ensures that all layers have a channel number that is divisible by 8\n    It can be seen here:\n    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py\n    \"\"\"\n    if min_ch is None:\n        min_ch = divisor\n    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)\n    # Make sure that round down does not go down by more than 10%.\n    if new_ch < 0.9 * ch:\n        new_ch += divisor\n    return new_ch\n\n\nclass ConvBNReLU(nn.Sequential):\n    def __init__(self, in_channel, out_channel, kernel_size=3, stride=1, groups=1):\n        padding = (kernel_size - 1) // 2\n        super(ConvBNReLU, self).__init__(\n            nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, groups=groups, bias=False),\n            nn.BatchNorm2d(out_channel),\n            nn.ReLU6(inplace=True)\n        )\n\n\nclass InvertedResidual(nn.Module):\n    def __init__(self, in_channel, out_channel, stride, expand_ratio):\n        super(InvertedResidual, self).__init__()\n        hidden_channel = in_channel * expand_ratio\n        self.use_shortcut = stride == 1 and in_channel == out_channel\n\n        layers = []\n        if expand_ratio != 1:\n            # 1x1 pointwise conv\n            layers.append(ConvBNReLU(in_channel, hidden_channel, kernel_size=1))\n        layers.extend([\n            # 3x3 depthwise conv\n            ConvBNReLU(hidden_channel, hidden_channel, stride=stride, groups=hidden_channel),\n            # 1x1 pointwise conv(linear)\n            nn.Conv2d(hidden_channel, out_channel, kernel_size=1, bias=False),\n            nn.BatchNorm2d(out_channel),\n        ])\n\n        self.conv = nn.Sequential(*layers)\n\n    def forward(self, x):\n        if self.use_shortcut:\n            return x + self.conv(x)\n        else:\n            return self.conv(x)\n\n\nclass MobileNetV2(nn.Module):\n    def __init__(self, num_classes=1000, alpha=1.0, round_nearest=8):\n        super(MobileNetV2, self).__init__()\n        block = InvertedResidual\n        input_channel = _make_divisible(32 * alpha, round_nearest)\n        last_channel = _make_divisible(1280 * alpha, round_nearest)\n\n        inverted_residual_setting = [\n            # t, c, n, s\n            [1, 16, 1, 1],\n            [6, 24, 2, 2],\n            [6, 32, 3, 2],\n            [6, 64, 4, 2],\n            [6, 96, 3, 1],\n            [6, 160, 3, 2],\n            [6, 320, 1, 1],\n        ]\n\n        features = []\n        # conv1 layer\n        features.append(ConvBNReLU(3, input_channel, stride=2))\n        # building inverted residual residual blockes\n        for t, c, n, s in inverted_residual_setting:\n            output_channel = _make_divisible(c * alpha, round_nearest)\n            for i in range(n):\n                stride = s if i == 0 else 1\n                features.append(block(input_channel, output_channel, stride, expand_ratio=t))\n                input_channel = output_channel\n        # building last several layers\n        features.append(ConvBNReLU(input_channel, last_channel, 1))\n        # combine feature layers\n        self.features = nn.Sequential(*features)\n\n        # building classifier\n        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))\n        self.classifier = nn.Sequential(\n            nn.Dropout(0.2),\n            nn.Linear(last_channel, num_classes)\n        )\n\n        # weight initialization\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode='fan_out')\n                if m.bias is not None:\n                    nn.init.zeros_(m.bias)\n            elif isinstance(m, nn.BatchNorm2d):\n                nn.init.ones_(m.weight)\n                nn.init.zeros_(m.bias)\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                nn.init.zeros_(m.bias)\n\n    def forward(self, x):\n        x = self.features(x)\n        x = self.avgpool(x)\n        x = torch.flatten(x, 1)\n        x = self.classifier(x)\n        return x\n"
  },
  {
    "path": "pytorch_classification/Test6_mobilenet/model_v3.py",
    "content": "from typing import Callable, List, Optional\n\nimport torch\nfrom torch import nn, Tensor\nfrom torch.nn import functional as F\nfrom functools import partial\n\n\ndef _make_divisible(ch, divisor=8, min_ch=None):\n    \"\"\"\n    This function is taken from the original tf repo.\n    It ensures that all layers have a channel number that is divisible by 8\n    It can be seen here:\n    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py\n    \"\"\"\n    if min_ch is None:\n        min_ch = divisor\n    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)\n    # Make sure that round down does not go down by more than 10%.\n    if new_ch < 0.9 * ch:\n        new_ch += divisor\n    return new_ch\n\n\nclass ConvBNActivation(nn.Sequential):\n    def __init__(self,\n                 in_planes: int,\n                 out_planes: int,\n                 kernel_size: int = 3,\n                 stride: int = 1,\n                 groups: int = 1,\n                 norm_layer: Optional[Callable[..., nn.Module]] = None,\n                 activation_layer: Optional[Callable[..., nn.Module]] = None):\n        padding = (kernel_size - 1) // 2\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n        if activation_layer is None:\n            activation_layer = nn.ReLU6\n        super(ConvBNActivation, self).__init__(nn.Conv2d(in_channels=in_planes,\n                                                         out_channels=out_planes,\n                                                         kernel_size=kernel_size,\n                                                         stride=stride,\n                                                         padding=padding,\n                                                         groups=groups,\n                                                         bias=False),\n                                               norm_layer(out_planes),\n                                               activation_layer(inplace=True))\n\n\nclass SqueezeExcitation(nn.Module):\n    def __init__(self, input_c: int, squeeze_factor: int = 4):\n        super(SqueezeExcitation, self).__init__()\n        squeeze_c = _make_divisible(input_c // squeeze_factor, 8)\n        self.fc1 = nn.Conv2d(input_c, squeeze_c, 1)\n        self.fc2 = nn.Conv2d(squeeze_c, input_c, 1)\n\n    def forward(self, x: Tensor) -> Tensor:\n        scale = F.adaptive_avg_pool2d(x, output_size=(1, 1))\n        scale = self.fc1(scale)\n        scale = F.relu(scale, inplace=True)\n        scale = self.fc2(scale)\n        scale = F.hardsigmoid(scale, inplace=True)\n        return scale * x\n\n\nclass InvertedResidualConfig:\n    def __init__(self,\n                 input_c: int,\n                 kernel: int,\n                 expanded_c: int,\n                 out_c: int,\n                 use_se: bool,\n                 activation: str,\n                 stride: int,\n                 width_multi: float):\n        self.input_c = self.adjust_channels(input_c, width_multi)\n        self.kernel = kernel\n        self.expanded_c = self.adjust_channels(expanded_c, width_multi)\n        self.out_c = self.adjust_channels(out_c, width_multi)\n        self.use_se = use_se\n        self.use_hs = activation == \"HS\"  # whether using h-swish activation\n        self.stride = stride\n\n    @staticmethod\n    def adjust_channels(channels: int, width_multi: float):\n        return _make_divisible(channels * width_multi, 8)\n\n\nclass InvertedResidual(nn.Module):\n    def __init__(self,\n                 cnf: InvertedResidualConfig,\n                 norm_layer: Callable[..., nn.Module]):\n        super(InvertedResidual, self).__init__()\n\n        if cnf.stride not in [1, 2]:\n            raise ValueError(\"illegal stride value.\")\n\n        self.use_res_connect = (cnf.stride == 1 and cnf.input_c == cnf.out_c)\n\n        layers: List[nn.Module] = []\n        activation_layer = nn.Hardswish if cnf.use_hs else nn.ReLU\n\n        # expand\n        if cnf.expanded_c != cnf.input_c:\n            layers.append(ConvBNActivation(cnf.input_c,\n                                           cnf.expanded_c,\n                                           kernel_size=1,\n                                           norm_layer=norm_layer,\n                                           activation_layer=activation_layer))\n\n        # depthwise\n        layers.append(ConvBNActivation(cnf.expanded_c,\n                                       cnf.expanded_c,\n                                       kernel_size=cnf.kernel,\n                                       stride=cnf.stride,\n                                       groups=cnf.expanded_c,\n                                       norm_layer=norm_layer,\n                                       activation_layer=activation_layer))\n\n        if cnf.use_se:\n            layers.append(SqueezeExcitation(cnf.expanded_c))\n\n        # project\n        layers.append(ConvBNActivation(cnf.expanded_c,\n                                       cnf.out_c,\n                                       kernel_size=1,\n                                       norm_layer=norm_layer,\n                                       activation_layer=nn.Identity))\n\n        self.block = nn.Sequential(*layers)\n        self.out_channels = cnf.out_c\n        self.is_strided = cnf.stride > 1\n\n    def forward(self, x: Tensor) -> Tensor:\n        result = self.block(x)\n        if self.use_res_connect:\n            result += x\n\n        return result\n\n\nclass MobileNetV3(nn.Module):\n    def __init__(self,\n                 inverted_residual_setting: List[InvertedResidualConfig],\n                 last_channel: int,\n                 num_classes: int = 1000,\n                 block: Optional[Callable[..., nn.Module]] = None,\n                 norm_layer: Optional[Callable[..., nn.Module]] = None):\n        super(MobileNetV3, self).__init__()\n\n        if not inverted_residual_setting:\n            raise ValueError(\"The inverted_residual_setting should not be empty.\")\n        elif not (isinstance(inverted_residual_setting, List) and\n                  all([isinstance(s, InvertedResidualConfig) for s in inverted_residual_setting])):\n            raise TypeError(\"The inverted_residual_setting should be List[InvertedResidualConfig]\")\n\n        if block is None:\n            block = InvertedResidual\n\n        if norm_layer is None:\n            norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.01)\n\n        layers: List[nn.Module] = []\n\n        # building first layer\n        firstconv_output_c = inverted_residual_setting[0].input_c\n        layers.append(ConvBNActivation(3,\n                                       firstconv_output_c,\n                                       kernel_size=3,\n                                       stride=2,\n                                       norm_layer=norm_layer,\n                                       activation_layer=nn.Hardswish))\n        # building inverted residual blocks\n        for cnf in inverted_residual_setting:\n            layers.append(block(cnf, norm_layer))\n\n        # building last several layers\n        lastconv_input_c = inverted_residual_setting[-1].out_c\n        lastconv_output_c = 6 * lastconv_input_c\n        layers.append(ConvBNActivation(lastconv_input_c,\n                                       lastconv_output_c,\n                                       kernel_size=1,\n                                       norm_layer=norm_layer,\n                                       activation_layer=nn.Hardswish))\n        self.features = nn.Sequential(*layers)\n        self.avgpool = nn.AdaptiveAvgPool2d(1)\n        self.classifier = nn.Sequential(nn.Linear(lastconv_output_c, last_channel),\n                                        nn.Hardswish(inplace=True),\n                                        nn.Dropout(p=0.2, inplace=True),\n                                        nn.Linear(last_channel, num_classes))\n\n        # initial weights\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode=\"fan_out\")\n                if m.bias is not None:\n                    nn.init.zeros_(m.bias)\n            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):\n                nn.init.ones_(m.weight)\n                nn.init.zeros_(m.bias)\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                nn.init.zeros_(m.bias)\n\n    def _forward_impl(self, x: Tensor) -> Tensor:\n        x = self.features(x)\n        x = self.avgpool(x)\n        x = torch.flatten(x, 1)\n        x = self.classifier(x)\n\n        return x\n\n    def forward(self, x: Tensor) -> Tensor:\n        return self._forward_impl(x)\n\n\ndef mobilenet_v3_large(num_classes: int = 1000,\n                       reduced_tail: bool = False) -> MobileNetV3:\n    \"\"\"\n    Constructs a large MobileNetV3 architecture from\n    \"Searching for MobileNetV3\" <https://arxiv.org/abs/1905.02244>.\n\n    weights_link:\n    https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth\n\n    Args:\n        num_classes (int): number of classes\n        reduced_tail (bool): If True, reduces the channel counts of all feature layers\n            between C4 and C5 by 2. It is used to reduce the channel redundancy in the\n            backbone for Detection and Segmentation.\n    \"\"\"\n    width_multi = 1.0\n    bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi)\n    adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi)\n\n    reduce_divider = 2 if reduced_tail else 1\n\n    inverted_residual_setting = [\n        # input_c, kernel, expanded_c, out_c, use_se, activation, stride\n        bneck_conf(16, 3, 16, 16, False, \"RE\", 1),\n        bneck_conf(16, 3, 64, 24, False, \"RE\", 2),  # C1\n        bneck_conf(24, 3, 72, 24, False, \"RE\", 1),\n        bneck_conf(24, 5, 72, 40, True, \"RE\", 2),  # C2\n        bneck_conf(40, 5, 120, 40, True, \"RE\", 1),\n        bneck_conf(40, 5, 120, 40, True, \"RE\", 1),\n        bneck_conf(40, 3, 240, 80, False, \"HS\", 2),  # C3\n        bneck_conf(80, 3, 200, 80, False, \"HS\", 1),\n        bneck_conf(80, 3, 184, 80, False, \"HS\", 1),\n        bneck_conf(80, 3, 184, 80, False, \"HS\", 1),\n        bneck_conf(80, 3, 480, 112, True, \"HS\", 1),\n        bneck_conf(112, 3, 672, 112, True, \"HS\", 1),\n        bneck_conf(112, 5, 672, 160 // reduce_divider, True, \"HS\", 2),  # C4\n        bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, \"HS\", 1),\n        bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, \"HS\", 1),\n    ]\n    last_channel = adjust_channels(1280 // reduce_divider)  # C5\n\n    return MobileNetV3(inverted_residual_setting=inverted_residual_setting,\n                       last_channel=last_channel,\n                       num_classes=num_classes)\n\n\ndef mobilenet_v3_small(num_classes: int = 1000,\n                       reduced_tail: bool = False) -> MobileNetV3:\n    \"\"\"\n    Constructs a large MobileNetV3 architecture from\n    \"Searching for MobileNetV3\" <https://arxiv.org/abs/1905.02244>.\n\n    weights_link:\n    https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth\n\n    Args:\n        num_classes (int): number of classes\n        reduced_tail (bool): If True, reduces the channel counts of all feature layers\n            between C4 and C5 by 2. It is used to reduce the channel redundancy in the\n            backbone for Detection and Segmentation.\n    \"\"\"\n    width_multi = 1.0\n    bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi)\n    adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi)\n\n    reduce_divider = 2 if reduced_tail else 1\n\n    inverted_residual_setting = [\n        # input_c, kernel, expanded_c, out_c, use_se, activation, stride\n        bneck_conf(16, 3, 16, 16, True, \"RE\", 2),  # C1\n        bneck_conf(16, 3, 72, 24, False, \"RE\", 2),  # C2\n        bneck_conf(24, 3, 88, 24, False, \"RE\", 1),\n        bneck_conf(24, 5, 96, 40, True, \"HS\", 2),  # C3\n        bneck_conf(40, 5, 240, 40, True, \"HS\", 1),\n        bneck_conf(40, 5, 240, 40, True, \"HS\", 1),\n        bneck_conf(40, 5, 120, 48, True, \"HS\", 1),\n        bneck_conf(48, 5, 144, 48, True, \"HS\", 1),\n        bneck_conf(48, 5, 288, 96 // reduce_divider, True, \"HS\", 2),  # C4\n        bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, \"HS\", 1),\n        bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, \"HS\", 1)\n    ]\n    last_channel = adjust_channels(1024 // reduce_divider)  # C5\n\n    return MobileNetV3(inverted_residual_setting=inverted_residual_setting,\n                       last_channel=last_channel,\n                       num_classes=num_classes)\n"
  },
  {
    "path": "pytorch_classification/Test6_mobilenet/predict.py",
    "content": "import os\nimport json\n\nimport torch\nfrom PIL import Image\nfrom torchvision import transforms\nimport matplotlib.pyplot as plt\n\nfrom model_v2 import MobileNetV2\n\n\ndef main():\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n\n    data_transform = transforms.Compose(\n        [transforms.Resize(256),\n         transforms.CenterCrop(224),\n         transforms.ToTensor(),\n         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n    plt.imshow(img)\n    # [N, C, H, W]\n    img = data_transform(img)\n    # expand batch dimension\n    img = torch.unsqueeze(img, dim=0)\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    # create model\n    model = MobileNetV2(num_classes=5).to(device)\n    # load model weights\n    model_weight_path = \"./MobileNetV2.pth\"\n    model.load_state_dict(torch.load(model_weight_path, map_location=device))\n    model.eval()\n    with torch.no_grad():\n        # predict class\n        output = torch.squeeze(model(img.to(device))).cpu()\n        predict = torch.softmax(output, dim=0)\n        predict_cla = torch.argmax(predict).numpy()\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_cla)],\n                                                 predict[predict_cla].numpy())\n    plt.title(print_res)\n    for i in range(len(predict)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  predict[i].numpy()))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/Test6_mobilenet/train.py",
    "content": "import os\nimport sys\nimport json\n\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nfrom torchvision import transforms, datasets\nfrom tqdm import tqdm\n\nfrom model_v2 import MobileNetV2\n\n\ndef main():\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n    print(\"using {} device.\".format(device))\n\n    batch_size = 16\n    epochs = 5\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.RandomResizedCrop(224),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),\n        \"val\": transforms.Compose([transforms.Resize(256),\n                                   transforms.CenterCrop(224),\n                                   transforms.ToTensor(),\n                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}\n\n    data_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\n    image_path = os.path.join(data_root, \"data_set\", \"flower_data\")  # flower data set path\n    assert os.path.exists(image_path), \"{} path does not exist.\".format(image_path)\n    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, \"train\"),\n                                         transform=data_transform[\"train\"])\n    train_num = len(train_dataset)\n\n    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}\n    flower_list = train_dataset.class_to_idx\n    cla_dict = dict((val, key) for key, val in flower_list.items())\n    # write dict into json file\n    json_str = json.dumps(cla_dict, indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using {} dataloader workers every process'.format(nw))\n\n    train_loader = torch.utils.data.DataLoader(train_dataset,\n                                               batch_size=batch_size, shuffle=True,\n                                               num_workers=nw)\n\n    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, \"val\"),\n                                            transform=data_transform[\"val\"])\n    val_num = len(validate_dataset)\n    validate_loader = torch.utils.data.DataLoader(validate_dataset,\n                                                  batch_size=batch_size, shuffle=False,\n                                                  num_workers=nw)\n\n    print(\"using {} images for training, {} images for validation.\".format(train_num,\n                                                                           val_num))\n\n    # create model\n    net = MobileNetV2(num_classes=5)\n\n    # load pretrain weights\n    # download url: https://download.pytorch.org/models/mobilenet_v2-b0353104.pth\n    model_weight_path = \"./mobilenet_v2.pth\"\n    assert os.path.exists(model_weight_path), \"file {} dose not exist.\".format(model_weight_path)\n    pre_weights = torch.load(model_weight_path, map_location='cpu')\n\n    # delete classifier weights\n    pre_dict = {k: v for k, v in pre_weights.items() if net.state_dict()[k].numel() == v.numel()}\n    missing_keys, unexpected_keys = net.load_state_dict(pre_dict, strict=False)\n\n    # freeze features weights\n    for param in net.features.parameters():\n        param.requires_grad = False\n\n    net.to(device)\n\n    # define loss function\n    loss_function = nn.CrossEntropyLoss()\n\n    # construct an optimizer\n    params = [p for p in net.parameters() if p.requires_grad]\n    optimizer = optim.Adam(params, lr=0.0001)\n\n    best_acc = 0.0\n    save_path = './MobileNetV2.pth'\n    train_steps = len(train_loader)\n    for epoch in range(epochs):\n        # train\n        net.train()\n        running_loss = 0.0\n        train_bar = tqdm(train_loader, file=sys.stdout)\n        for step, data in enumerate(train_bar):\n            images, labels = data\n            optimizer.zero_grad()\n            logits = net(images.to(device))\n            loss = loss_function(logits, labels.to(device))\n            loss.backward()\n            optimizer.step()\n\n            # print statistics\n            running_loss += loss.item()\n\n            train_bar.desc = \"train epoch[{}/{}] loss:{:.3f}\".format(epoch + 1,\n                                                                     epochs,\n                                                                     loss)\n\n        # validate\n        net.eval()\n        acc = 0.0  # accumulate accurate number / epoch\n        with torch.no_grad():\n            val_bar = tqdm(validate_loader, file=sys.stdout)\n            for val_data in val_bar:\n                val_images, val_labels = val_data\n                outputs = net(val_images.to(device))\n                # loss = loss_function(outputs, test_labels)\n                predict_y = torch.max(outputs, dim=1)[1]\n                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()\n\n                val_bar.desc = \"valid epoch[{}/{}]\".format(epoch + 1,\n                                                           epochs)\n        val_accurate = acc / val_num\n        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %\n              (epoch + 1, running_loss / train_steps, val_accurate))\n\n        if val_accurate > best_acc:\n            best_acc = val_accurate\n            torch.save(net.state_dict(), save_path)\n\n    print('Finished Training')\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/Test7_shufflenet/README.md",
    "content": "## 代码使用简介\n\n1. 下载好数据集，代码中默认使用的是花分类数据集，下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz),\n如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0\n2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径\n3. 下载预训练权重，在`model.py`文件中每个模型都有提供预训练权重的下载地址，根据自己使用的模型下载对应预训练权重\n4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径\n5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件)\n6. 在`predict.py`脚本中导入和训练脚本中同样的模型，并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下)\n7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径\n8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了\n9. 如果要使用自己的数据集，请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹)，并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数\n"
  },
  {
    "path": "pytorch_classification/Test7_shufflenet/class_indices.json",
    "content": "{\n    \"0\": \"daisy\",\n    \"1\": \"dandelion\",\n    \"2\": \"roses\",\n    \"3\": \"sunflowers\",\n    \"4\": \"tulips\"\n}"
  },
  {
    "path": "pytorch_classification/Test7_shufflenet/model.py",
    "content": "from typing import List, Callable\n\nimport torch\nfrom torch import Tensor\nimport torch.nn as nn\n\n\ndef channel_shuffle(x: Tensor, groups: int) -> Tensor:\n\n    batch_size, num_channels, height, width = x.size()\n    channels_per_group = num_channels // groups\n\n    # reshape\n    # [batch_size, num_channels, height, width] -> [batch_size, groups, channels_per_group, height, width]\n    x = x.view(batch_size, groups, channels_per_group, height, width)\n\n    x = torch.transpose(x, 1, 2).contiguous()\n\n    # flatten\n    x = x.view(batch_size, -1, height, width)\n\n    return x\n\n\nclass InvertedResidual(nn.Module):\n    def __init__(self, input_c: int, output_c: int, stride: int):\n        super(InvertedResidual, self).__init__()\n\n        if stride not in [1, 2]:\n            raise ValueError(\"illegal stride value.\")\n        self.stride = stride\n\n        assert output_c % 2 == 0\n        branch_features = output_c // 2\n        # 当stride为1时，input_channel应该是branch_features的两倍\n        # python中 '<<' 是位运算，可理解为计算×2的快速方法\n        assert (self.stride != 1) or (input_c == branch_features << 1)\n\n        if self.stride == 2:\n            self.branch1 = nn.Sequential(\n                self.depthwise_conv(input_c, input_c, kernel_s=3, stride=self.stride, padding=1),\n                nn.BatchNorm2d(input_c),\n                nn.Conv2d(input_c, branch_features, kernel_size=1, stride=1, padding=0, bias=False),\n                nn.BatchNorm2d(branch_features),\n                nn.ReLU(inplace=True)\n            )\n        else:\n            self.branch1 = nn.Sequential()\n\n        self.branch2 = nn.Sequential(\n            nn.Conv2d(input_c if self.stride > 1 else branch_features, branch_features, kernel_size=1,\n                      stride=1, padding=0, bias=False),\n            nn.BatchNorm2d(branch_features),\n            nn.ReLU(inplace=True),\n            self.depthwise_conv(branch_features, branch_features, kernel_s=3, stride=self.stride, padding=1),\n            nn.BatchNorm2d(branch_features),\n            nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False),\n            nn.BatchNorm2d(branch_features),\n            nn.ReLU(inplace=True)\n        )\n\n    @staticmethod\n    def depthwise_conv(input_c: int,\n                       output_c: int,\n                       kernel_s: int,\n                       stride: int = 1,\n                       padding: int = 0,\n                       bias: bool = False) -> nn.Conv2d:\n        return nn.Conv2d(in_channels=input_c, out_channels=output_c, kernel_size=kernel_s,\n                         stride=stride, padding=padding, bias=bias, groups=input_c)\n\n    def forward(self, x: Tensor) -> Tensor:\n        if self.stride == 1:\n            x1, x2 = x.chunk(2, dim=1)\n            out = torch.cat((x1, self.branch2(x2)), dim=1)\n        else:\n            out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)\n\n        out = channel_shuffle(out, 2)\n\n        return out\n\n\nclass ShuffleNetV2(nn.Module):\n    def __init__(self,\n                 stages_repeats: List[int],\n                 stages_out_channels: List[int],\n                 num_classes: int = 1000,\n                 inverted_residual: Callable[..., nn.Module] = InvertedResidual):\n        super(ShuffleNetV2, self).__init__()\n\n        if len(stages_repeats) != 3:\n            raise ValueError(\"expected stages_repeats as list of 3 positive ints\")\n        if len(stages_out_channels) != 5:\n            raise ValueError(\"expected stages_out_channels as list of 5 positive ints\")\n        self._stage_out_channels = stages_out_channels\n\n        # input RGB image\n        input_channels = 3\n        output_channels = self._stage_out_channels[0]\n\n        self.conv1 = nn.Sequential(\n            nn.Conv2d(input_channels, output_channels, kernel_size=3, stride=2, padding=1, bias=False),\n            nn.BatchNorm2d(output_channels),\n            nn.ReLU(inplace=True)\n        )\n        input_channels = output_channels\n\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n\n        # Static annotations for mypy\n        self.stage2: nn.Sequential\n        self.stage3: nn.Sequential\n        self.stage4: nn.Sequential\n\n        stage_names = [\"stage{}\".format(i) for i in [2, 3, 4]]\n        for name, repeats, output_channels in zip(stage_names, stages_repeats,\n                                                  self._stage_out_channels[1:]):\n            seq = [inverted_residual(input_channels, output_channels, 2)]\n            for i in range(repeats - 1):\n                seq.append(inverted_residual(output_channels, output_channels, 1))\n            setattr(self, name, nn.Sequential(*seq))\n            input_channels = output_channels\n\n        output_channels = self._stage_out_channels[-1]\n        self.conv5 = nn.Sequential(\n            nn.Conv2d(input_channels, output_channels, kernel_size=1, stride=1, padding=0, bias=False),\n            nn.BatchNorm2d(output_channels),\n            nn.ReLU(inplace=True)\n        )\n\n        self.fc = nn.Linear(output_channels, num_classes)\n\n    def _forward_impl(self, x: Tensor) -> Tensor:\n        # See note [TorchScript super()]\n        x = self.conv1(x)\n        x = self.maxpool(x)\n        x = self.stage2(x)\n        x = self.stage3(x)\n        x = self.stage4(x)\n        x = self.conv5(x)\n        x = x.mean([2, 3])  # global pool\n        x = self.fc(x)\n        return x\n\n    def forward(self, x: Tensor) -> Tensor:\n        return self._forward_impl(x)\n\n\ndef shufflenet_v2_x0_5(num_classes=1000):\n    \"\"\"\n    Constructs a ShuffleNetV2 with 0.5x output channels, as described in\n    `\"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design\"\n    <https://arxiv.org/abs/1807.11164>`.\n    weight: https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth\n\n    :param num_classes:\n    :return:\n    \"\"\"\n    model = ShuffleNetV2(stages_repeats=[4, 8, 4],\n                         stages_out_channels=[24, 48, 96, 192, 1024],\n                         num_classes=num_classes)\n\n    return model\n\n\ndef shufflenet_v2_x1_0(num_classes=1000):\n    \"\"\"\n    Constructs a ShuffleNetV2 with 1.0x output channels, as described in\n    `\"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design\"\n    <https://arxiv.org/abs/1807.11164>`.\n    weight: https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth\n\n    :param num_classes:\n    :return:\n    \"\"\"\n    model = ShuffleNetV2(stages_repeats=[4, 8, 4],\n                         stages_out_channels=[24, 116, 232, 464, 1024],\n                         num_classes=num_classes)\n\n    return model\n\n\ndef shufflenet_v2_x1_5(num_classes=1000):\n    \"\"\"\n    Constructs a ShuffleNetV2 with 1.0x output channels, as described in\n    `\"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design\"\n    <https://arxiv.org/abs/1807.11164>`.\n    weight: https://download.pytorch.org/models/shufflenetv2_x1_5-3c479a10.pth\n\n    :param num_classes:\n    :return:\n    \"\"\"\n    model = ShuffleNetV2(stages_repeats=[4, 8, 4],\n                         stages_out_channels=[24, 176, 352, 704, 1024],\n                         num_classes=num_classes)\n\n    return model\n\n\ndef shufflenet_v2_x2_0(num_classes=1000):\n    \"\"\"\n    Constructs a ShuffleNetV2 with 1.0x output channels, as described in\n    `\"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design\"\n    <https://arxiv.org/abs/1807.11164>`.\n    weight: https://download.pytorch.org/models/shufflenetv2_x2_0-8be3c8ee.pth\n\n    :param num_classes:\n    :return:\n    \"\"\"\n    model = ShuffleNetV2(stages_repeats=[4, 8, 4],\n                         stages_out_channels=[24, 244, 488, 976, 2048],\n                         num_classes=num_classes)\n\n    return model\n"
  },
  {
    "path": "pytorch_classification/Test7_shufflenet/my_dataset.py",
    "content": "from PIL import Image\nimport torch\nfrom torch.utils.data import Dataset\n\n\nclass MyDataSet(Dataset):\n    \"\"\"自定义数据集\"\"\"\n\n    def __init__(self, images_path: list, images_class: list, transform=None):\n        self.images_path = images_path\n        self.images_class = images_class\n        self.transform = transform\n\n    def __len__(self):\n        return len(self.images_path)\n\n    def __getitem__(self, item):\n        img = Image.open(self.images_path[item])\n        # RGB为彩色图片，L为灰度图片\n        if img.mode != 'RGB':\n            raise ValueError(\"image: {} isn't RGB mode.\".format(self.images_path[item]))\n        label = self.images_class[item]\n\n        if self.transform is not None:\n            img = self.transform(img)\n\n        return img, label\n\n    @staticmethod\n    def collate_fn(batch):\n        # 官方实现的default_collate可以参考\n        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py\n        images, labels = tuple(zip(*batch))\n\n        images = torch.stack(images, dim=0)\n        labels = torch.as_tensor(labels)\n        return images, labels\n"
  },
  {
    "path": "pytorch_classification/Test7_shufflenet/predict.py",
    "content": "import os\nimport json\n\nimport torch\nfrom PIL import Image\nfrom torchvision import transforms\nimport matplotlib.pyplot as plt\n\nfrom model import shufflenet_v2_x1_0\n\n\ndef main():\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n\n    data_transform = transforms.Compose(\n        [transforms.Resize(256),\n         transforms.CenterCrop(224),\n         transforms.ToTensor(),\n         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n    plt.imshow(img)\n    # [N, C, H, W]\n    img = data_transform(img)\n    # expand batch dimension\n    img = torch.unsqueeze(img, dim=0)\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    # create model\n    model = shufflenet_v2_x1_0(num_classes=5).to(device)\n    # load model weights\n    model_weight_path = \"./weights/model-29.pth\"\n    model.load_state_dict(torch.load(model_weight_path, map_location=device))\n    model.eval()\n    with torch.no_grad():\n        # predict class\n        output = torch.squeeze(model(img.to(device))).cpu()\n        predict = torch.softmax(output, dim=0)\n        predict_cla = torch.argmax(predict).numpy()\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_cla)],\n                                                 predict[predict_cla].numpy())\n    plt.title(print_res)\n    for i in range(len(predict)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  predict[i].numpy()))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/Test7_shufflenet/train.py",
    "content": "import os\nimport math\nimport argparse\n\nimport torch\nimport torch.optim as optim\nfrom torch.utils.tensorboard import SummaryWriter\nfrom torchvision import transforms\nimport torch.optim.lr_scheduler as lr_scheduler\n\nfrom model import shufflenet_v2_x1_0\nfrom my_dataset import MyDataSet\nfrom utils import read_split_data, train_one_epoch, evaluate\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n\n    print(args)\n    print('Start Tensorboard with \"tensorboard --logdir=runs\", view at http://localhost:6006/')\n    tb_writer = SummaryWriter()\n    if os.path.exists(\"./weights\") is False:\n        os.makedirs(\"./weights\")\n\n    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.RandomResizedCrop(224),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),\n        \"val\": transforms.Compose([transforms.Resize(256),\n                                   transforms.CenterCrop(224),\n                                   transforms.ToTensor(),\n                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}\n\n    # 实例化训练数据集\n    train_dataset = MyDataSet(images_path=train_images_path,\n                              images_class=train_images_label,\n                              transform=data_transform[\"train\"])\n\n    # 实例化验证数据集\n    val_dataset = MyDataSet(images_path=val_images_path,\n                            images_class=val_images_label,\n                            transform=data_transform[\"val\"])\n\n    batch_size = args.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using {} dataloader workers every process'.format(nw))\n    train_loader = torch.utils.data.DataLoader(train_dataset,\n                                               batch_size=batch_size,\n                                               shuffle=True,\n                                               pin_memory=True,\n                                               num_workers=nw,\n                                               collate_fn=train_dataset.collate_fn)\n\n    val_loader = torch.utils.data.DataLoader(val_dataset,\n                                             batch_size=batch_size,\n                                             shuffle=False,\n                                             pin_memory=True,\n                                             num_workers=nw,\n                                             collate_fn=val_dataset.collate_fn)\n\n    # 如果存在预训练权重则载入\n    model = shufflenet_v2_x1_0(num_classes=args.num_classes).to(device)\n    if args.weights != \"\":\n        if os.path.exists(args.weights):\n            weights_dict = torch.load(args.weights, map_location=device)\n            load_weights_dict = {k: v for k, v in weights_dict.items()\n                                 if model.state_dict()[k].numel() == v.numel()}\n            print(model.load_state_dict(load_weights_dict, strict=False))\n        else:\n            raise FileNotFoundError(\"not found weights file: {}\".format(args.weights))\n\n    # 是否冻结权重\n    if args.freeze_layers:\n        for name, para in model.named_parameters():\n            # 除最后的全连接层外，其他权重全部冻结\n            if \"fc\" not in name:\n                para.requires_grad_(False)\n\n    pg = [p for p in model.parameters() if p.requires_grad]\n    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=4E-5)\n    # Scheduler https://arxiv.org/pdf/1812.01187.pdf\n    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine\n    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)\n\n    for epoch in range(args.epochs):\n        # train\n        mean_loss = train_one_epoch(model=model,\n                                    optimizer=optimizer,\n                                    data_loader=train_loader,\n                                    device=device,\n                                    epoch=epoch)\n\n        scheduler.step()\n\n        # validate\n        acc = evaluate(model=model,\n                       data_loader=val_loader,\n                       device=device)\n\n        print(\"[epoch {}] accuracy: {}\".format(epoch, round(acc, 3)))\n        tags = [\"loss\", \"accuracy\", \"learning_rate\"]\n        tb_writer.add_scalar(tags[0], mean_loss, epoch)\n        tb_writer.add_scalar(tags[1], acc, epoch)\n        tb_writer.add_scalar(tags[2], optimizer.param_groups[0][\"lr\"], epoch)\n\n        torch.save(model.state_dict(), \"./weights/model-{}.pth\".format(epoch))\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--num_classes', type=int, default=5)\n    parser.add_argument('--epochs', type=int, default=30)\n    parser.add_argument('--batch-size', type=int, default=16)\n    parser.add_argument('--lr', type=float, default=0.01)\n    parser.add_argument('--lrf', type=float, default=0.1)\n\n    # 数据集所在根目录\n    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz\n    parser.add_argument('--data-path', type=str,\n                        default=\"/data/flower_photos\")\n\n    # shufflenetv2_x1.0 官方权重下载地址\n    # https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth\n    parser.add_argument('--weights', type=str, default='./shufflenetv2_x1.pth',\n                        help='initial weights path')\n    parser.add_argument('--freeze-layers', type=bool, default=False)\n    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')\n\n    opt = parser.parse_args()\n\n    main(opt)\n"
  },
  {
    "path": "pytorch_classification/Test7_shufflenet/utils.py",
    "content": "import os\nimport sys\nimport json\nimport pickle\nimport random\n\nimport torch\nfrom tqdm import tqdm\n\nimport matplotlib.pyplot as plt\n\n\ndef read_split_data(root: str, val_rate: float = 0.2):\n    random.seed(0)  # 保证随机结果可复现\n    assert os.path.exists(root), \"dataset root: {} does not exist.\".format(root)\n\n    # 遍历文件夹，一个文件夹对应一个类别\n    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]\n    # 排序，保证各平台顺序一致\n    flower_class.sort()\n    # 生成类别名称以及对应的数字索引\n    class_indices = dict((k, v) for v, k in enumerate(flower_class))\n    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    train_images_path = []  # 存储训练集的所有图片路径\n    train_images_label = []  # 存储训练集图片对应索引信息\n    val_images_path = []  # 存储验证集的所有图片路径\n    val_images_label = []  # 存储验证集图片对应索引信息\n    every_class_num = []  # 存储每个类别的样本总数\n    supported = [\".jpg\", \".JPG\", \".png\", \".PNG\"]  # 支持的文件后缀类型\n    # 遍历每个文件夹下的文件\n    for cla in flower_class:\n        cla_path = os.path.join(root, cla)\n        # 遍历获取supported支持的所有文件路径\n        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)\n                  if os.path.splitext(i)[-1] in supported]\n        # 排序，保证各平台顺序一致\n        images.sort()\n        # 获取该类别对应的索引\n        image_class = class_indices[cla]\n        # 记录该类别的样本数量\n        every_class_num.append(len(images))\n        # 按比例随机采样验证样本\n        val_path = random.sample(images, k=int(len(images) * val_rate))\n\n        for img_path in images:\n            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集\n                val_images_path.append(img_path)\n                val_images_label.append(image_class)\n            else:  # 否则存入训练集\n                train_images_path.append(img_path)\n                train_images_label.append(image_class)\n\n    print(\"{} images were found in the dataset.\".format(sum(every_class_num)))\n    print(\"{} images for training.\".format(len(train_images_path)))\n    print(\"{} images for validation.\".format(len(val_images_path)))\n    assert len(train_images_path) > 0, \"number of training images must greater than 0.\"\n    assert len(val_images_path) > 0, \"number of validation images must greater than 0.\"\n\n    plot_image = False\n    if plot_image:\n        # 绘制每种类别个数柱状图\n        plt.bar(range(len(flower_class)), every_class_num, align='center')\n        # 将横坐标0,1,2,3,4替换为相应的类别名称\n        plt.xticks(range(len(flower_class)), flower_class)\n        # 在柱状图上添加数值标签\n        for i, v in enumerate(every_class_num):\n            plt.text(x=i, y=v + 5, s=str(v), ha='center')\n        # 设置x坐标\n        plt.xlabel('image class')\n        # 设置y坐标\n        plt.ylabel('number of images')\n        # 设置柱状图的标题\n        plt.title('flower class distribution')\n        plt.show()\n\n    return train_images_path, train_images_label, val_images_path, val_images_label\n\n\ndef plot_data_loader_image(data_loader):\n    batch_size = data_loader.batch_size\n    plot_num = min(batch_size, 4)\n\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), json_path + \" does not exist.\"\n    json_file = open(json_path, 'r')\n    class_indices = json.load(json_file)\n\n    for data in data_loader:\n        images, labels = data\n        for i in range(plot_num):\n            # [C, H, W] -> [H, W, C]\n            img = images[i].numpy().transpose(1, 2, 0)\n            # 反Normalize操作\n            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255\n            label = labels[i].item()\n            plt.subplot(1, plot_num, i+1)\n            plt.xlabel(class_indices[str(label)])\n            plt.xticks([])  # 去掉x轴的刻度\n            plt.yticks([])  # 去掉y轴的刻度\n            plt.imshow(img.astype('uint8'))\n        plt.show()\n\n\ndef write_pickle(list_info: list, file_name: str):\n    with open(file_name, 'wb') as f:\n        pickle.dump(list_info, f)\n\n\ndef read_pickle(file_name: str) -> list:\n    with open(file_name, 'rb') as f:\n        info_list = pickle.load(f)\n        return info_list\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch):\n    model.train()\n    loss_function = torch.nn.CrossEntropyLoss()\n    mean_loss = torch.zeros(1).to(device)\n    optimizer.zero_grad()\n\n    data_loader = tqdm(data_loader, file=sys.stdout)\n\n    for step, data in enumerate(data_loader):\n        images, labels = data\n\n        pred = model(images.to(device))\n\n        loss = loss_function(pred, labels.to(device))\n        loss.backward()\n        mean_loss = (mean_loss * step + loss.detach()) / (step + 1)  # update mean losses\n\n        data_loader.desc = \"[epoch {}] mean loss {}\".format(epoch, round(mean_loss.item(), 3))\n\n        if not torch.isfinite(loss):\n            print('WARNING: non-finite loss, ending training ', loss)\n            sys.exit(1)\n\n        optimizer.step()\n        optimizer.zero_grad()\n\n    return mean_loss.item()\n\n\n@torch.no_grad()\ndef evaluate(model, data_loader, device):\n    model.eval()\n\n    # 验证样本总个数\n    total_num = len(data_loader.dataset)\n\n    # 用于存储预测正确的样本个数\n    sum_num = torch.zeros(1).to(device)\n\n    data_loader = tqdm(data_loader, file=sys.stdout)\n\n    for step, data in enumerate(data_loader):\n        images, labels = data\n        pred = model(images.to(device))\n        pred = torch.max(pred, dim=1)[1]\n        sum_num += torch.eq(pred, labels.to(device)).sum()\n\n    return sum_num.item() / total_num\n"
  },
  {
    "path": "pytorch_classification/Test8_densenet/README.md",
    "content": "## 代码使用简介\n\n1. 下载好数据集，代码中默认使用的是花分类数据集，下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz),\n如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0\n2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径\n3. 下载预训练权重，在`model.py`文件中每个模型都有提供预训练权重的下载地址，根据自己使用的模型下载对应预训练权重\n4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径\n5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件)\n6. 在`predict.py`脚本中导入和训练脚本中同样的模型，并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下)\n7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径\n8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了\n9. 如果要使用自己的数据集，请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹)，并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数\n"
  },
  {
    "path": "pytorch_classification/Test8_densenet/model.py",
    "content": "import re\nfrom typing import Any, List, Tuple\nfrom collections import OrderedDict\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.utils.checkpoint as cp\nfrom torch import Tensor\n\n\nclass _DenseLayer(nn.Module):\n    def __init__(self,\n                 input_c: int,\n                 growth_rate: int,\n                 bn_size: int,\n                 drop_rate: float,\n                 memory_efficient: bool = False):\n        super(_DenseLayer, self).__init__()\n\n        self.add_module(\"norm1\", nn.BatchNorm2d(input_c))\n        self.add_module(\"relu1\", nn.ReLU(inplace=True))\n        self.add_module(\"conv1\", nn.Conv2d(in_channels=input_c,\n                                           out_channels=bn_size * growth_rate,\n                                           kernel_size=1,\n                                           stride=1,\n                                           bias=False))\n        self.add_module(\"norm2\", nn.BatchNorm2d(bn_size * growth_rate))\n        self.add_module(\"relu2\", nn.ReLU(inplace=True))\n        self.add_module(\"conv2\", nn.Conv2d(bn_size * growth_rate,\n                                           growth_rate,\n                                           kernel_size=3,\n                                           stride=1,\n                                           padding=1,\n                                           bias=False))\n        self.drop_rate = drop_rate\n        self.memory_efficient = memory_efficient\n\n    def bn_function(self, inputs: List[Tensor]) -> Tensor:\n        concat_features = torch.cat(inputs, 1)\n        bottleneck_output = self.conv1(self.relu1(self.norm1(concat_features)))\n        return bottleneck_output\n\n    @staticmethod\n    def any_requires_grad(inputs: List[Tensor]) -> bool:\n        for tensor in inputs:\n            if tensor.requires_grad:\n                return True\n\n        return False\n\n    @torch.jit.unused\n    def call_checkpoint_bottleneck(self, inputs: List[Tensor]) -> Tensor:\n        def closure(*inp):\n            return self.bn_function(inp)\n\n        return cp.checkpoint(closure, *inputs)\n\n    def forward(self, inputs: Tensor) -> Tensor:\n        if isinstance(inputs, Tensor):\n            prev_features = [inputs]\n        else:\n            prev_features = inputs\n\n        if self.memory_efficient and self.any_requires_grad(prev_features):\n            if torch.jit.is_scripting():\n                raise Exception(\"memory efficient not supported in JIT\")\n\n            bottleneck_output = self.call_checkpoint_bottleneck(prev_features)\n        else:\n            bottleneck_output = self.bn_function(prev_features)\n\n        new_features = self.conv2(self.relu2(self.norm2(bottleneck_output)))\n        if self.drop_rate > 0:\n            new_features = F.dropout(new_features,\n                                     p=self.drop_rate,\n                                     training=self.training)\n\n        return new_features\n\n\nclass _DenseBlock(nn.ModuleDict):\n    _version = 2\n\n    def __init__(self,\n                 num_layers: int,\n                 input_c: int,\n                 bn_size: int,\n                 growth_rate: int,\n                 drop_rate: float,\n                 memory_efficient: bool = False):\n        super(_DenseBlock, self).__init__()\n        for i in range(num_layers):\n            layer = _DenseLayer(input_c + i * growth_rate,\n                                growth_rate=growth_rate,\n                                bn_size=bn_size,\n                                drop_rate=drop_rate,\n                                memory_efficient=memory_efficient)\n            self.add_module(\"denselayer%d\" % (i + 1), layer)\n\n    def forward(self, init_features: Tensor) -> Tensor:\n        features = [init_features]\n        for name, layer in self.items():\n            new_features = layer(features)\n            features.append(new_features)\n        return torch.cat(features, 1)\n\n\nclass _Transition(nn.Sequential):\n    def __init__(self,\n                 input_c: int,\n                 output_c: int):\n        super(_Transition, self).__init__()\n        self.add_module(\"norm\", nn.BatchNorm2d(input_c))\n        self.add_module(\"relu\", nn.ReLU(inplace=True))\n        self.add_module(\"conv\", nn.Conv2d(input_c,\n                                          output_c,\n                                          kernel_size=1,\n                                          stride=1,\n                                          bias=False))\n        self.add_module(\"pool\", nn.AvgPool2d(kernel_size=2, stride=2))\n\n\nclass DenseNet(nn.Module):\n    \"\"\"\n    Densenet-BC model class for imagenet\n\n    Args:\n        growth_rate (int) - how many filters to add each layer (`k` in paper)\n        block_config (list of 4 ints) - how many layers in each pooling block\n        num_init_features (int) - the number of filters to learn in the first convolution layer\n        bn_size (int) - multiplicative factor for number of bottle neck layers\n          (i.e. bn_size * k features in the bottleneck layer)\n        drop_rate (float) - dropout rate after each dense layer\n        num_classes (int) - number of classification classes\n        memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient\n    \"\"\"\n\n    def __init__(self,\n                 growth_rate: int = 32,\n                 block_config: Tuple[int, int, int, int] = (6, 12, 24, 16),\n                 num_init_features: int = 64,\n                 bn_size: int = 4,\n                 drop_rate: float = 0,\n                 num_classes: int = 1000,\n                 memory_efficient: bool = False):\n        super(DenseNet, self).__init__()\n\n        # first conv+bn+relu+pool\n        self.features = nn.Sequential(OrderedDict([\n            (\"conv0\", nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)),\n            (\"norm0\", nn.BatchNorm2d(num_init_features)),\n            (\"relu0\", nn.ReLU(inplace=True)),\n            (\"pool0\", nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),\n        ]))\n\n        # each dense block\n        num_features = num_init_features\n        for i, num_layers in enumerate(block_config):\n            block = _DenseBlock(num_layers=num_layers,\n                                input_c=num_features,\n                                bn_size=bn_size,\n                                growth_rate=growth_rate,\n                                drop_rate=drop_rate,\n                                memory_efficient=memory_efficient)\n            self.features.add_module(\"denseblock%d\" % (i + 1), block)\n            num_features = num_features + num_layers * growth_rate\n\n            if i != len(block_config) - 1:\n                trans = _Transition(input_c=num_features,\n                                    output_c=num_features // 2)\n                self.features.add_module(\"transition%d\" % (i + 1), trans)\n                num_features = num_features // 2\n\n        # finnal batch norm\n        self.features.add_module(\"norm5\", nn.BatchNorm2d(num_features))\n\n        # fc layer\n        self.classifier = nn.Linear(num_features, num_classes)\n\n        # init weights\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight)\n            elif isinstance(m, nn.BatchNorm2d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.Linear):\n                nn.init.constant_(m.bias, 0)\n\n    def forward(self, x: Tensor) -> Tensor:\n        features = self.features(x)\n        out = F.relu(features, inplace=True)\n        out = F.adaptive_avg_pool2d(out, (1, 1))\n        out = torch.flatten(out, 1)\n        out = self.classifier(out)\n        return out\n\n\ndef densenet121(**kwargs: Any) -> DenseNet:\n    # Top-1 error: 25.35%\n    # 'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth'\n    return DenseNet(growth_rate=32,\n                    block_config=(6, 12, 24, 16),\n                    num_init_features=64,\n                    **kwargs)\n\n\ndef densenet169(**kwargs: Any) -> DenseNet:\n    # Top-1 error: 24.00%\n    # 'densenet169': 'https://download.pytorch.org/models/densenet169-b2777c0a.pth'\n    return DenseNet(growth_rate=32,\n                    block_config=(6, 12, 32, 32),\n                    num_init_features=64,\n                    **kwargs)\n\n\ndef densenet201(**kwargs: Any) -> DenseNet:\n    # Top-1 error: 22.80%\n    # 'densenet201': 'https://download.pytorch.org/models/densenet201-c1103571.pth'\n    return DenseNet(growth_rate=32,\n                    block_config=(6, 12, 48, 32),\n                    num_init_features=64,\n                    **kwargs)\n\n\ndef densenet161(**kwargs: Any) -> DenseNet:\n    # Top-1 error: 22.35%\n    # 'densenet161': 'https://download.pytorch.org/models/densenet161-8d451a50.pth'\n    return DenseNet(growth_rate=48,\n                    block_config=(6, 12, 36, 24),\n                    num_init_features=96,\n                    **kwargs)\n\n\ndef load_state_dict(model: nn.Module, weights_path: str) -> None:\n    # '.'s are no longer allowed in module names, but previous _DenseLayer\n    # has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.\n    # They are also in the checkpoints in model_urls. This pattern is used\n    # to find such keys.\n    pattern = re.compile(\n        r'^(.*denselayer\\d+\\.(?:norm|relu|conv))\\.((?:[12])\\.(?:weight|bias|running_mean|running_var))$')\n\n    state_dict = torch.load(weights_path)\n\n    num_classes = model.classifier.out_features\n    load_fc = num_classes == 1000\n\n    for key in list(state_dict.keys()):\n        if load_fc is False:\n            if \"classifier\" in key:\n                del state_dict[key]\n\n        res = pattern.match(key)\n        if res:\n            new_key = res.group(1) + res.group(2)\n            state_dict[new_key] = state_dict[key]\n            del state_dict[key]\n    model.load_state_dict(state_dict, strict=load_fc)\n    print(\"successfully load pretrain-weights.\")\n"
  },
  {
    "path": "pytorch_classification/Test8_densenet/my_dataset.py",
    "content": "from PIL import Image\nimport torch\nfrom torch.utils.data import Dataset\n\n\nclass MyDataSet(Dataset):\n    \"\"\"自定义数据集\"\"\"\n\n    def __init__(self, images_path: list, images_class: list, transform=None):\n        self.images_path = images_path\n        self.images_class = images_class\n        self.transform = transform\n\n    def __len__(self):\n        return len(self.images_path)\n\n    def __getitem__(self, item):\n        img = Image.open(self.images_path[item])\n        # RGB为彩色图片，L为灰度图片\n        if img.mode != 'RGB':\n            raise ValueError(\"image: {} isn't RGB mode.\".format(self.images_path[item]))\n        label = self.images_class[item]\n\n        if self.transform is not None:\n            img = self.transform(img)\n\n        return img, label\n\n    @staticmethod\n    def collate_fn(batch):\n        # 官方实现的default_collate可以参考\n        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py\n        images, labels = tuple(zip(*batch))\n\n        images = torch.stack(images, dim=0)\n        labels = torch.as_tensor(labels)\n        return images, labels\n"
  },
  {
    "path": "pytorch_classification/Test8_densenet/predict.py",
    "content": "import os\nimport json\n\nimport torch\nfrom PIL import Image\nfrom torchvision import transforms\nimport matplotlib.pyplot as plt\n\nfrom model import densenet121\n\n\ndef main():\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n\n    data_transform = transforms.Compose(\n        [transforms.Resize(256),\n         transforms.CenterCrop(224),\n         transforms.ToTensor(),\n         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n    plt.imshow(img)\n    # [N, C, H, W]\n    img = data_transform(img)\n    # expand batch dimension\n    img = torch.unsqueeze(img, dim=0)\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    # create model\n    model = densenet121(num_classes=5).to(device)\n    # load model weights\n    model_weight_path = \"./weights/model-3.pth\"\n    model.load_state_dict(torch.load(model_weight_path, map_location=device))\n    model.eval()\n    with torch.no_grad():\n        # predict class\n        output = torch.squeeze(model(img.to(device))).cpu()\n        predict = torch.softmax(output, dim=0)\n        predict_cla = torch.argmax(predict).numpy()\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_cla)],\n                                                 predict[predict_cla].numpy())\n    plt.title(print_res)\n    for i in range(len(predict)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  predict[i].numpy()))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/Test8_densenet/train.py",
    "content": "import os\nimport math\nimport argparse\n\nimport torch\nimport torch.optim as optim\nfrom torch.utils.tensorboard import SummaryWriter\nfrom torchvision import transforms\nimport torch.optim.lr_scheduler as lr_scheduler\n\nfrom model import densenet121, load_state_dict\nfrom my_dataset import MyDataSet\nfrom utils import read_split_data, train_one_epoch, evaluate\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n\n    print(args)\n    print('Start Tensorboard with \"tensorboard --logdir=runs\", view at http://localhost:6006/')\n    tb_writer = SummaryWriter()\n    if os.path.exists(\"./weights\") is False:\n        os.makedirs(\"./weights\")\n\n    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.RandomResizedCrop(224),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),\n        \"val\": transforms.Compose([transforms.Resize(256),\n                                   transforms.CenterCrop(224),\n                                   transforms.ToTensor(),\n                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}\n\n    # 实例化训练数据集\n    train_dataset = MyDataSet(images_path=train_images_path,\n                              images_class=train_images_label,\n                              transform=data_transform[\"train\"])\n\n    # 实例化验证数据集\n    val_dataset = MyDataSet(images_path=val_images_path,\n                            images_class=val_images_label,\n                            transform=data_transform[\"val\"])\n\n    batch_size = args.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using {} dataloader workers every process'.format(nw))\n    train_loader = torch.utils.data.DataLoader(train_dataset,\n                                               batch_size=batch_size,\n                                               shuffle=True,\n                                               pin_memory=True,\n                                               num_workers=nw,\n                                               collate_fn=train_dataset.collate_fn)\n\n    val_loader = torch.utils.data.DataLoader(val_dataset,\n                                             batch_size=batch_size,\n                                             shuffle=False,\n                                             pin_memory=True,\n                                             num_workers=nw,\n                                             collate_fn=val_dataset.collate_fn)\n\n    # 如果存在预训练权重则载入\n    model = densenet121(num_classes=args.num_classes).to(device)\n    if args.weights != \"\":\n        if os.path.exists(args.weights):\n            load_state_dict(model, args.weights)\n        else:\n            raise FileNotFoundError(\"not found weights file: {}\".format(args.weights))\n\n    # 是否冻结权重\n    if args.freeze_layers:\n        for name, para in model.named_parameters():\n            # 除最后的全连接层外，其他权重全部冻结\n            if \"classifier\" not in name:\n                para.requires_grad_(False)\n\n    pg = [p for p in model.parameters() if p.requires_grad]\n    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=1E-4, nesterov=True)\n    # Scheduler https://arxiv.org/pdf/1812.01187.pdf\n    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine\n    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)\n\n    for epoch in range(args.epochs):\n        # train\n        mean_loss = train_one_epoch(model=model,\n                                    optimizer=optimizer,\n                                    data_loader=train_loader,\n                                    device=device,\n                                    epoch=epoch)\n\n        scheduler.step()\n\n        # validate\n        acc = evaluate(model=model,\n                       data_loader=val_loader,\n                       device=device)\n\n        print(\"[epoch {}] accuracy: {}\".format(epoch, round(acc, 3)))\n        tags = [\"loss\", \"accuracy\", \"learning_rate\"]\n        tb_writer.add_scalar(tags[0], mean_loss, epoch)\n        tb_writer.add_scalar(tags[1], acc, epoch)\n        tb_writer.add_scalar(tags[2], optimizer.param_groups[0][\"lr\"], epoch)\n\n        torch.save(model.state_dict(), \"./weights/model-{}.pth\".format(epoch))\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--num_classes', type=int, default=5)\n    parser.add_argument('--epochs', type=int, default=30)\n    parser.add_argument('--batch-size', type=int, default=16)\n    parser.add_argument('--lr', type=float, default=0.001)\n    parser.add_argument('--lrf', type=float, default=0.1)\n\n    # 数据集所在根目录\n    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz\n    parser.add_argument('--data-path', type=str,\n                        default=\"/data/flower_photos\")\n\n    # densenet121 官方权重下载地址\n    # https://download.pytorch.org/models/densenet121-a639ec97.pth\n    parser.add_argument('--weights', type=str, default='densenet121.pth',\n                        help='initial weights path')\n    parser.add_argument('--freeze-layers', type=bool, default=False)\n    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')\n\n    opt = parser.parse_args()\n\n    main(opt)\n"
  },
  {
    "path": "pytorch_classification/Test8_densenet/utils.py",
    "content": "import os\nimport sys\nimport json\nimport pickle\nimport random\n\nimport torch\nfrom tqdm import tqdm\n\nimport matplotlib.pyplot as plt\n\n\ndef read_split_data(root: str, val_rate: float = 0.2):\n    random.seed(0)  # 保证随机结果可复现\n    assert os.path.exists(root), \"dataset root: {} does not exist.\".format(root)\n\n    # 遍历文件夹，一个文件夹对应一个类别\n    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]\n    # 排序，保证各平台顺序一致\n    flower_class.sort()\n    # 生成类别名称以及对应的数字索引\n    class_indices = dict((k, v) for v, k in enumerate(flower_class))\n    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    train_images_path = []  # 存储训练集的所有图片路径\n    train_images_label = []  # 存储训练集图片对应索引信息\n    val_images_path = []  # 存储验证集的所有图片路径\n    val_images_label = []  # 存储验证集图片对应索引信息\n    every_class_num = []  # 存储每个类别的样本总数\n    supported = [\".jpg\", \".JPG\", \".png\", \".PNG\"]  # 支持的文件后缀类型\n    # 遍历每个文件夹下的文件\n    for cla in flower_class:\n        cla_path = os.path.join(root, cla)\n        # 遍历获取supported支持的所有文件路径\n        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)\n                  if os.path.splitext(i)[-1] in supported]\n        # 排序，保证各平台顺序一致\n        images.sort()\n        # 获取该类别对应的索引\n        image_class = class_indices[cla]\n        # 记录该类别的样本数量\n        every_class_num.append(len(images))\n        # 按比例随机采样验证样本\n        val_path = random.sample(images, k=int(len(images) * val_rate))\n\n        for img_path in images:\n            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集\n                val_images_path.append(img_path)\n                val_images_label.append(image_class)\n            else:  # 否则存入训练集\n                train_images_path.append(img_path)\n                train_images_label.append(image_class)\n\n    print(\"{} images were found in the dataset.\".format(sum(every_class_num)))\n    print(\"{} images for training.\".format(len(train_images_path)))\n    print(\"{} images for validation.\".format(len(val_images_path)))\n    assert len(train_images_path) > 0, \"number of training images must greater than 0.\"\n    assert len(val_images_path) > 0, \"number of validation images must greater than 0.\"\n\n    plot_image = False\n    if plot_image:\n        # 绘制每种类别个数柱状图\n        plt.bar(range(len(flower_class)), every_class_num, align='center')\n        # 将横坐标0,1,2,3,4替换为相应的类别名称\n        plt.xticks(range(len(flower_class)), flower_class)\n        # 在柱状图上添加数值标签\n        for i, v in enumerate(every_class_num):\n            plt.text(x=i, y=v + 5, s=str(v), ha='center')\n        # 设置x坐标\n        plt.xlabel('image class')\n        # 设置y坐标\n        plt.ylabel('number of images')\n        # 设置柱状图的标题\n        plt.title('flower class distribution')\n        plt.show()\n\n    return train_images_path, train_images_label, val_images_path, val_images_label\n\n\ndef plot_data_loader_image(data_loader):\n    batch_size = data_loader.batch_size\n    plot_num = min(batch_size, 4)\n\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), json_path + \" does not exist.\"\n    json_file = open(json_path, 'r')\n    class_indices = json.load(json_file)\n\n    for data in data_loader:\n        images, labels = data\n        for i in range(plot_num):\n            # [C, H, W] -> [H, W, C]\n            img = images[i].numpy().transpose(1, 2, 0)\n            # 反Normalize操作\n            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255\n            label = labels[i].item()\n            plt.subplot(1, plot_num, i+1)\n            plt.xlabel(class_indices[str(label)])\n            plt.xticks([])  # 去掉x轴的刻度\n            plt.yticks([])  # 去掉y轴的刻度\n            plt.imshow(img.astype('uint8'))\n        plt.show()\n\n\ndef write_pickle(list_info: list, file_name: str):\n    with open(file_name, 'wb') as f:\n        pickle.dump(list_info, f)\n\n\ndef read_pickle(file_name: str) -> list:\n    with open(file_name, 'rb') as f:\n        info_list = pickle.load(f)\n        return info_list\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch):\n    model.train()\n    loss_function = torch.nn.CrossEntropyLoss()\n    mean_loss = torch.zeros(1).to(device)\n    optimizer.zero_grad()\n\n    data_loader = tqdm(data_loader, file=sys.stdout)\n\n    for step, data in enumerate(data_loader):\n        images, labels = data\n\n        pred = model(images.to(device))\n\n        loss = loss_function(pred, labels.to(device))\n        loss.backward()\n        mean_loss = (mean_loss * step + loss.detach()) / (step + 1)  # update mean losses\n\n        data_loader.desc = \"[epoch {}] mean loss {}\".format(epoch, round(mean_loss.item(), 3))\n\n        if not torch.isfinite(loss):\n            print('WARNING: non-finite loss, ending training ', loss)\n            sys.exit(1)\n\n        optimizer.step()\n        optimizer.zero_grad()\n\n    return mean_loss.item()\n\n\n@torch.no_grad()\ndef evaluate(model, data_loader, device):\n    model.eval()\n\n    # 验证样本总个数\n    total_num = len(data_loader.dataset)\n\n    # 用于存储预测正确的样本个数\n    sum_num = torch.zeros(1).to(device)\n\n    data_loader = tqdm(data_loader, file=sys.stdout)\n\n    for step, data in enumerate(data_loader):\n        images, labels = data\n        pred = model(images.to(device))\n        pred = torch.max(pred, dim=1)[1]\n        sum_num += torch.eq(pred, labels.to(device)).sum()\n\n    return sum_num.item() / total_num\n"
  },
  {
    "path": "pytorch_classification/Test9_efficientNet/README.md",
    "content": "## 代码使用简介\n\n1. 下载好数据集，代码中默认使用的是花分类数据集，下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz),\n如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0\n2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径\n3. 下载预训练权重，根据自己使用的模型下载对应预训练权重: https://pan.baidu.com/s/1ouX0UmjCsmSx3ZrqXbowjw  密码: 090i\n4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径\n5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件)\n6. 在`predict.py`脚本中导入和训练脚本中同样的模型，并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下)\n7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径\n8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了\n9. 如果要使用自己的数据集，请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹)，并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数\n"
  },
  {
    "path": "pytorch_classification/Test9_efficientNet/model.py",
    "content": "import math\nimport copy\nfrom functools import partial\nfrom collections import OrderedDict\nfrom typing import Optional, Callable\n\nimport torch\nimport torch.nn as nn\nfrom torch import Tensor\nfrom torch.nn import functional as F\n\n\ndef _make_divisible(ch, divisor=8, min_ch=None):\n    \"\"\"\n    This function is taken from the original tf repo.\n    It ensures that all layers have a channel number that is divisible by 8\n    It can be seen here:\n    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py\n    \"\"\"\n    if min_ch is None:\n        min_ch = divisor\n    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)\n    # Make sure that round down does not go down by more than 10%.\n    if new_ch < 0.9 * ch:\n        new_ch += divisor\n    return new_ch\n\n\ndef drop_path(x, drop_prob: float = 0., training: bool = False):\n    \"\"\"\n    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).\n    \"Deep Networks with Stochastic Depth\", https://arxiv.org/pdf/1603.09382.pdf\n\n    This function is taken from the rwightman.\n    It can be seen here:\n    https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py#L140\n    \"\"\"\n    if drop_prob == 0. or not training:\n        return x\n    keep_prob = 1 - drop_prob\n    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets\n    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)\n    random_tensor.floor_()  # binarize\n    output = x.div(keep_prob) * random_tensor\n    return output\n\n\nclass DropPath(nn.Module):\n    \"\"\"\n    Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).\n    \"Deep Networks with Stochastic Depth\", https://arxiv.org/pdf/1603.09382.pdf\n    \"\"\"\n    def __init__(self, drop_prob=None):\n        super(DropPath, self).__init__()\n        self.drop_prob = drop_prob\n\n    def forward(self, x):\n        return drop_path(x, self.drop_prob, self.training)\n\n\nclass ConvBNActivation(nn.Sequential):\n    def __init__(self,\n                 in_planes: int,\n                 out_planes: int,\n                 kernel_size: int = 3,\n                 stride: int = 1,\n                 groups: int = 1,\n                 norm_layer: Optional[Callable[..., nn.Module]] = None,\n                 activation_layer: Optional[Callable[..., nn.Module]] = None):\n        padding = (kernel_size - 1) // 2\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n        if activation_layer is None:\n            activation_layer = nn.SiLU  # alias Swish  (torch>=1.7)\n\n        super(ConvBNActivation, self).__init__(nn.Conv2d(in_channels=in_planes,\n                                                         out_channels=out_planes,\n                                                         kernel_size=kernel_size,\n                                                         stride=stride,\n                                                         padding=padding,\n                                                         groups=groups,\n                                                         bias=False),\n                                               norm_layer(out_planes),\n                                               activation_layer())\n\n\nclass SqueezeExcitation(nn.Module):\n    def __init__(self,\n                 input_c: int,   # block input channel\n                 expand_c: int,  # block expand channel\n                 squeeze_factor: int = 4):\n        super(SqueezeExcitation, self).__init__()\n        squeeze_c = input_c // squeeze_factor\n        self.fc1 = nn.Conv2d(expand_c, squeeze_c, 1)\n        self.ac1 = nn.SiLU()  # alias Swish\n        self.fc2 = nn.Conv2d(squeeze_c, expand_c, 1)\n        self.ac2 = nn.Sigmoid()\n\n    def forward(self, x: Tensor) -> Tensor:\n        scale = F.adaptive_avg_pool2d(x, output_size=(1, 1))\n        scale = self.fc1(scale)\n        scale = self.ac1(scale)\n        scale = self.fc2(scale)\n        scale = self.ac2(scale)\n        return scale * x\n\n\nclass InvertedResidualConfig:\n    # kernel_size, in_channel, out_channel, exp_ratio, strides, use_SE, drop_connect_rate\n    def __init__(self,\n                 kernel: int,          # 3 or 5\n                 input_c: int,\n                 out_c: int,\n                 expanded_ratio: int,  # 1 or 6\n                 stride: int,          # 1 or 2\n                 use_se: bool,         # True\n                 drop_rate: float,\n                 index: str,           # 1a, 2a, 2b, ...\n                 width_coefficient: float):\n        self.input_c = self.adjust_channels(input_c, width_coefficient)\n        self.kernel = kernel\n        self.expanded_c = self.input_c * expanded_ratio\n        self.out_c = self.adjust_channels(out_c, width_coefficient)\n        self.use_se = use_se\n        self.stride = stride\n        self.drop_rate = drop_rate\n        self.index = index\n\n    @staticmethod\n    def adjust_channels(channels: int, width_coefficient: float):\n        return _make_divisible(channels * width_coefficient, 8)\n\n\nclass InvertedResidual(nn.Module):\n    def __init__(self,\n                 cnf: InvertedResidualConfig,\n                 norm_layer: Callable[..., nn.Module]):\n        super(InvertedResidual, self).__init__()\n\n        if cnf.stride not in [1, 2]:\n            raise ValueError(\"illegal stride value.\")\n\n        self.use_res_connect = (cnf.stride == 1 and cnf.input_c == cnf.out_c)\n\n        layers = OrderedDict()\n        activation_layer = nn.SiLU  # alias Swish\n\n        # expand\n        if cnf.expanded_c != cnf.input_c:\n            layers.update({\"expand_conv\": ConvBNActivation(cnf.input_c,\n                                                           cnf.expanded_c,\n                                                           kernel_size=1,\n                                                           norm_layer=norm_layer,\n                                                           activation_layer=activation_layer)})\n\n        # depthwise\n        layers.update({\"dwconv\": ConvBNActivation(cnf.expanded_c,\n                                                  cnf.expanded_c,\n                                                  kernel_size=cnf.kernel,\n                                                  stride=cnf.stride,\n                                                  groups=cnf.expanded_c,\n                                                  norm_layer=norm_layer,\n                                                  activation_layer=activation_layer)})\n\n        if cnf.use_se:\n            layers.update({\"se\": SqueezeExcitation(cnf.input_c,\n                                                   cnf.expanded_c)})\n\n        # project\n        layers.update({\"project_conv\": ConvBNActivation(cnf.expanded_c,\n                                                        cnf.out_c,\n                                                        kernel_size=1,\n                                                        norm_layer=norm_layer,\n                                                        activation_layer=nn.Identity)})\n\n        self.block = nn.Sequential(layers)\n        self.out_channels = cnf.out_c\n        self.is_strided = cnf.stride > 1\n\n        # 只有在使用shortcut连接时才使用dropout层\n        if self.use_res_connect and cnf.drop_rate > 0:\n            self.dropout = DropPath(cnf.drop_rate)\n        else:\n            self.dropout = nn.Identity()\n\n    def forward(self, x: Tensor) -> Tensor:\n        result = self.block(x)\n        result = self.dropout(result)\n        if self.use_res_connect:\n            result += x\n\n        return result\n\n\nclass EfficientNet(nn.Module):\n    def __init__(self,\n                 width_coefficient: float,\n                 depth_coefficient: float,\n                 num_classes: int = 1000,\n                 dropout_rate: float = 0.2,\n                 drop_connect_rate: float = 0.2,\n                 block: Optional[Callable[..., nn.Module]] = None,\n                 norm_layer: Optional[Callable[..., nn.Module]] = None\n                 ):\n        super(EfficientNet, self).__init__()\n\n        # kernel_size, in_channel, out_channel, exp_ratio, strides, use_SE, drop_connect_rate, repeats\n        default_cnf = [[3, 32, 16, 1, 1, True, drop_connect_rate, 1],\n                       [3, 16, 24, 6, 2, True, drop_connect_rate, 2],\n                       [5, 24, 40, 6, 2, True, drop_connect_rate, 2],\n                       [3, 40, 80, 6, 2, True, drop_connect_rate, 3],\n                       [5, 80, 112, 6, 1, True, drop_connect_rate, 3],\n                       [5, 112, 192, 6, 2, True, drop_connect_rate, 4],\n                       [3, 192, 320, 6, 1, True, drop_connect_rate, 1]]\n\n        def round_repeats(repeats):\n            \"\"\"Round number of repeats based on depth multiplier.\"\"\"\n            return int(math.ceil(depth_coefficient * repeats))\n\n        if block is None:\n            block = InvertedResidual\n\n        if norm_layer is None:\n            norm_layer = partial(nn.BatchNorm2d, eps=1e-3, momentum=0.1)\n\n        adjust_channels = partial(InvertedResidualConfig.adjust_channels,\n                                  width_coefficient=width_coefficient)\n\n        # build inverted_residual_setting\n        bneck_conf = partial(InvertedResidualConfig,\n                             width_coefficient=width_coefficient)\n\n        b = 0\n        num_blocks = float(sum(round_repeats(i[-1]) for i in default_cnf))\n        inverted_residual_setting = []\n        for stage, args in enumerate(default_cnf):\n            cnf = copy.copy(args)\n            for i in range(round_repeats(cnf.pop(-1))):\n                if i > 0:\n                    # strides equal 1 except first cnf\n                    cnf[-3] = 1  # strides\n                    cnf[1] = cnf[2]  # input_channel equal output_channel\n\n                cnf[-1] = args[-2] * b / num_blocks  # update dropout ratio\n                index = str(stage + 1) + chr(i + 97)  # 1a, 2a, 2b, ...\n                inverted_residual_setting.append(bneck_conf(*cnf, index))\n                b += 1\n\n        # create layers\n        layers = OrderedDict()\n\n        # first conv\n        layers.update({\"stem_conv\": ConvBNActivation(in_planes=3,\n                                                     out_planes=adjust_channels(32),\n                                                     kernel_size=3,\n                                                     stride=2,\n                                                     norm_layer=norm_layer)})\n\n        # building inverted residual blocks\n        for cnf in inverted_residual_setting:\n            layers.update({cnf.index: block(cnf, norm_layer)})\n\n        # build top\n        last_conv_input_c = inverted_residual_setting[-1].out_c\n        last_conv_output_c = adjust_channels(1280)\n        layers.update({\"top\": ConvBNActivation(in_planes=last_conv_input_c,\n                                               out_planes=last_conv_output_c,\n                                               kernel_size=1,\n                                               norm_layer=norm_layer)})\n\n        self.features = nn.Sequential(layers)\n        self.avgpool = nn.AdaptiveAvgPool2d(1)\n\n        classifier = []\n        if dropout_rate > 0:\n            classifier.append(nn.Dropout(p=dropout_rate, inplace=True))\n        classifier.append(nn.Linear(last_conv_output_c, num_classes))\n        self.classifier = nn.Sequential(*classifier)\n\n        # initial weights\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode=\"fan_out\")\n                if m.bias is not None:\n                    nn.init.zeros_(m.bias)\n            elif isinstance(m, nn.BatchNorm2d):\n                nn.init.ones_(m.weight)\n                nn.init.zeros_(m.bias)\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                nn.init.zeros_(m.bias)\n\n    def _forward_impl(self, x: Tensor) -> Tensor:\n        x = self.features(x)\n        x = self.avgpool(x)\n        x = torch.flatten(x, 1)\n        x = self.classifier(x)\n\n        return x\n\n    def forward(self, x: Tensor) -> Tensor:\n        return self._forward_impl(x)\n\n\ndef efficientnet_b0(num_classes=1000):\n    # input image size 224x224\n    return EfficientNet(width_coefficient=1.0,\n                        depth_coefficient=1.0,\n                        dropout_rate=0.2,\n                        num_classes=num_classes)\n\n\ndef efficientnet_b1(num_classes=1000):\n    # input image size 240x240\n    return EfficientNet(width_coefficient=1.0,\n                        depth_coefficient=1.1,\n                        dropout_rate=0.2,\n                        num_classes=num_classes)\n\n\ndef efficientnet_b2(num_classes=1000):\n    # input image size 260x260\n    return EfficientNet(width_coefficient=1.1,\n                        depth_coefficient=1.2,\n                        dropout_rate=0.3,\n                        num_classes=num_classes)\n\n\ndef efficientnet_b3(num_classes=1000):\n    # input image size 300x300\n    return EfficientNet(width_coefficient=1.2,\n                        depth_coefficient=1.4,\n                        dropout_rate=0.3,\n                        num_classes=num_classes)\n\n\ndef efficientnet_b4(num_classes=1000):\n    # input image size 380x380\n    return EfficientNet(width_coefficient=1.4,\n                        depth_coefficient=1.8,\n                        dropout_rate=0.4,\n                        num_classes=num_classes)\n\n\ndef efficientnet_b5(num_classes=1000):\n    # input image size 456x456\n    return EfficientNet(width_coefficient=1.6,\n                        depth_coefficient=2.2,\n                        dropout_rate=0.4,\n                        num_classes=num_classes)\n\n\ndef efficientnet_b6(num_classes=1000):\n    # input image size 528x528\n    return EfficientNet(width_coefficient=1.8,\n                        depth_coefficient=2.6,\n                        dropout_rate=0.5,\n                        num_classes=num_classes)\n\n\ndef efficientnet_b7(num_classes=1000):\n    # input image size 600x600\n    return EfficientNet(width_coefficient=2.0,\n                        depth_coefficient=3.1,\n                        dropout_rate=0.5,\n                        num_classes=num_classes)\n"
  },
  {
    "path": "pytorch_classification/Test9_efficientNet/my_dataset.py",
    "content": "from PIL import Image\nimport torch\nfrom torch.utils.data import Dataset\n\n\nclass MyDataSet(Dataset):\n    \"\"\"自定义数据集\"\"\"\n\n    def __init__(self, images_path: list, images_class: list, transform=None):\n        self.images_path = images_path\n        self.images_class = images_class\n        self.transform = transform\n\n    def __len__(self):\n        return len(self.images_path)\n\n    def __getitem__(self, item):\n        img = Image.open(self.images_path[item])\n        # RGB为彩色图片，L为灰度图片\n        if img.mode != 'RGB':\n            raise ValueError(\"image: {} isn't RGB mode.\".format(self.images_path[item]))\n        label = self.images_class[item]\n\n        if self.transform is not None:\n            img = self.transform(img)\n\n        return img, label\n\n    @staticmethod\n    def collate_fn(batch):\n        # 官方实现的default_collate可以参考\n        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py\n        images, labels = tuple(zip(*batch))\n\n        images = torch.stack(images, dim=0)\n        labels = torch.as_tensor(labels)\n        return images, labels\n"
  },
  {
    "path": "pytorch_classification/Test9_efficientNet/predict.py",
    "content": "import os\nimport json\n\nimport torch\nfrom PIL import Image\nfrom torchvision import transforms\nimport matplotlib.pyplot as plt\n\nfrom model import efficientnet_b0 as create_model\n\n\ndef main():\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n\n    img_size = {\"B0\": 224,\n                \"B1\": 240,\n                \"B2\": 260,\n                \"B3\": 300,\n                \"B4\": 380,\n                \"B5\": 456,\n                \"B6\": 528,\n                \"B7\": 600}\n    num_model = \"B0\"\n\n    data_transform = transforms.Compose(\n        [transforms.Resize(img_size[num_model]),\n         transforms.CenterCrop(img_size[num_model]),\n         transforms.ToTensor(),\n         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n    plt.imshow(img)\n    # [N, C, H, W]\n    img = data_transform(img)\n    # expand batch dimension\n    img = torch.unsqueeze(img, dim=0)\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    # create model\n    model = create_model(num_classes=5).to(device)\n    # load model weights\n    model_weight_path = \"./weights/model-29.pth\"\n    model.load_state_dict(torch.load(model_weight_path, map_location=device))\n    model.eval()\n    with torch.no_grad():\n        # predict class\n        output = torch.squeeze(model(img.to(device))).cpu()\n        predict = torch.softmax(output, dim=0)\n        predict_cla = torch.argmax(predict).numpy()\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_cla)],\n                                                 predict[predict_cla].numpy())\n    plt.title(print_res)\n    for i in range(len(predict)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  predict[i].numpy()))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/Test9_efficientNet/requirements.txt",
    "content": "numpy\nmatplotlib\ntqdm==4.56.0\ntorch>=1.7.1\ntorchvision>=0.8.2\n"
  },
  {
    "path": "pytorch_classification/Test9_efficientNet/train.py",
    "content": "import os\nimport math\nimport argparse\n\nimport torch\nimport torch.optim as optim\nfrom torch.utils.tensorboard import SummaryWriter\nfrom torchvision import transforms\nimport torch.optim.lr_scheduler as lr_scheduler\n\nfrom model import efficientnet_b0 as create_model\nfrom my_dataset import MyDataSet\nfrom utils import read_split_data, train_one_epoch, evaluate\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n\n    print(args)\n    print('Start Tensorboard with \"tensorboard --logdir=runs\", view at http://localhost:6006/')\n    tb_writer = SummaryWriter()\n    if os.path.exists(\"./weights\") is False:\n        os.makedirs(\"./weights\")\n\n    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)\n\n    img_size = {\"B0\": 224,\n                \"B1\": 240,\n                \"B2\": 260,\n                \"B3\": 300,\n                \"B4\": 380,\n                \"B5\": 456,\n                \"B6\": 528,\n                \"B7\": 600}\n    num_model = \"B0\"\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.RandomResizedCrop(img_size[num_model]),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),\n        \"val\": transforms.Compose([transforms.Resize(img_size[num_model]),\n                                   transforms.CenterCrop(img_size[num_model]),\n                                   transforms.ToTensor(),\n                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}\n\n    # 实例化训练数据集\n    train_dataset = MyDataSet(images_path=train_images_path,\n                              images_class=train_images_label,\n                              transform=data_transform[\"train\"])\n\n    # 实例化验证数据集\n    val_dataset = MyDataSet(images_path=val_images_path,\n                            images_class=val_images_label,\n                            transform=data_transform[\"val\"])\n\n    batch_size = args.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using {} dataloader workers every process'.format(nw))\n    train_loader = torch.utils.data.DataLoader(train_dataset,\n                                               batch_size=batch_size,\n                                               shuffle=True,\n                                               pin_memory=True,\n                                               num_workers=nw,\n                                               collate_fn=train_dataset.collate_fn)\n\n    val_loader = torch.utils.data.DataLoader(val_dataset,\n                                             batch_size=batch_size,\n                                             shuffle=False,\n                                             pin_memory=True,\n                                             num_workers=nw,\n                                             collate_fn=val_dataset.collate_fn)\n\n    # 如果存在预训练权重则载入\n    model = create_model(num_classes=args.num_classes).to(device)\n    if args.weights != \"\":\n        if os.path.exists(args.weights):\n            weights_dict = torch.load(args.weights, map_location=device)\n            load_weights_dict = {k: v for k, v in weights_dict.items()\n                                 if model.state_dict()[k].numel() == v.numel()}\n            print(model.load_state_dict(load_weights_dict, strict=False))\n        else:\n            raise FileNotFoundError(\"not found weights file: {}\".format(args.weights))\n\n    # 是否冻结权重\n    if args.freeze_layers:\n        for name, para in model.named_parameters():\n            # 除最后一个卷积层和全连接层外，其他权重全部冻结\n            if (\"features.top\" not in name) and (\"classifier\" not in name):\n                para.requires_grad_(False)\n            else:\n                print(\"training {}\".format(name))\n\n    pg = [p for p in model.parameters() if p.requires_grad]\n    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=1E-4)\n    # Scheduler https://arxiv.org/pdf/1812.01187.pdf\n    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine\n    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)\n\n    for epoch in range(args.epochs):\n        # train\n        mean_loss = train_one_epoch(model=model,\n                                    optimizer=optimizer,\n                                    data_loader=train_loader,\n                                    device=device,\n                                    epoch=epoch)\n\n        scheduler.step()\n\n        # validate\n        acc = evaluate(model=model,\n                       data_loader=val_loader,\n                       device=device)\n        print(\"[epoch {}] accuracy: {}\".format(epoch, round(acc, 3)))\n        tags = [\"loss\", \"accuracy\", \"learning_rate\"]\n        tb_writer.add_scalar(tags[0], mean_loss, epoch)\n        tb_writer.add_scalar(tags[1], acc, epoch)\n        tb_writer.add_scalar(tags[2], optimizer.param_groups[0][\"lr\"], epoch)\n\n        torch.save(model.state_dict(), \"./weights/model-{}.pth\".format(epoch))\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--num_classes', type=int, default=5)\n    parser.add_argument('--epochs', type=int, default=30)\n    parser.add_argument('--batch-size', type=int, default=16)\n    parser.add_argument('--lr', type=float, default=0.01)\n    parser.add_argument('--lrf', type=float, default=0.01)\n\n    # 数据集所在根目录\n    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz\n    parser.add_argument('--data-path', type=str,\n                        default=\"/data/flower_photos\")\n\n    # download model weights\n    # 链接: https://pan.baidu.com/s/1ouX0UmjCsmSx3ZrqXbowjw  密码: 090i\n    parser.add_argument('--weights', type=str, default='./efficientnetb0.pth',\n                        help='initial weights path')\n    parser.add_argument('--freeze-layers', type=bool, default=False)\n    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')\n\n    opt = parser.parse_args()\n\n    main(opt)\n"
  },
  {
    "path": "pytorch_classification/Test9_efficientNet/trans_weights_to_pytorch.py",
    "content": "import numpy as np\nimport torch\nimport tensorflow as tf\n\nassert tf.version.VERSION >= \"2.4.0\", \"version of tf must greater/equal than 2.4.0\"\n\n\ndef main():\n    # save pytorch weights path\n    save_path = \"./efficientnetb0.pth\"\n\n    # create keras model and download weights\n    # EfficientNetB0, EfficientNetB1, EfficientNetB2, ...\n    m = tf.keras.applications.EfficientNetB0()\n\n    weights_dict = dict()\n    weights = m.weights[3:]  # delete norm weights\n    for weight in weights:\n        name = weight.name\n        data = weight.numpy()\n\n        if \"stem_conv/kernel:0\" == name:\n            torch_name = \"features.stem_conv.0.weight\"\n            weights_dict[torch_name] = np.transpose(data, (3, 2, 0, 1)).astype(np.float32)\n        elif \"stem_bn/gamma:0\" == name:\n            torch_name = \"features.stem_conv.1.weight\"\n            weights_dict[torch_name] = data\n        elif \"stem_bn/beta:0\" == name:\n            torch_name = \"features.stem_conv.1.bias\"\n            weights_dict[torch_name] = data\n        elif \"stem_bn/moving_mean:0\" == name:\n            torch_name = \"features.stem_conv.1.running_mean\"\n            weights_dict[torch_name] = data\n        elif \"stem_bn/moving_variance:0\" == name:\n            torch_name = \"features.stem_conv.1.running_var\"\n            weights_dict[torch_name] = data\n        elif \"block\" in name:\n            name = name[5:]  # delete \"block\" word\n            block_index = name[:2]  # 1a, 2a, ...\n            name = name[3:]  # delete block_index and \"_\"\n            torch_prefix = \"features.{}.block.\".format(block_index)\n\n            trans_dict = {\"expand_conv/kernel:0\": \"expand_conv.0.weight\",\n                          \"expand_bn/gamma:0\": \"expand_conv.1.weight\",\n                          \"expand_bn/beta:0\": \"expand_conv.1.bias\",\n                          \"expand_bn/moving_mean:0\": \"expand_conv.1.running_mean\",\n                          \"expand_bn/moving_variance:0\": \"expand_conv.1.running_var\",\n                          \"dwconv/depthwise_kernel:0\": \"dwconv.0.weight\",\n                          \"bn/gamma:0\": \"dwconv.1.weight\",\n                          \"bn/beta:0\": \"dwconv.1.bias\",\n                          \"bn/moving_mean:0\": \"dwconv.1.running_mean\",\n                          \"bn/moving_variance:0\": \"dwconv.1.running_var\",\n                          \"se_reduce/kernel:0\": \"se.fc1.weight\",\n                          \"se_reduce/bias:0\": \"se.fc1.bias\",\n                          \"se_expand/kernel:0\": \"se.fc2.weight\",\n                          \"se_expand/bias:0\": \"se.fc2.bias\",\n                          \"project_conv/kernel:0\": \"project_conv.0.weight\",\n                          \"project_bn/gamma:0\": \"project_conv.1.weight\",\n                          \"project_bn/beta:0\": \"project_conv.1.bias\",\n                          \"project_bn/moving_mean:0\": \"project_conv.1.running_mean\",\n                          \"project_bn/moving_variance:0\": \"project_conv.1.running_var\"}\n\n            assert name in trans_dict, \"key '{}' not in trans_dict\".format(name)\n            torch_postfix = trans_dict[name]\n            torch_name = torch_prefix + torch_postfix\n            if torch_postfix in [\"expand_conv.0.weight\", \"se.fc1.weight\", \"se.fc2.weight\", \"project_conv.0.weight\"]:\n                data = np.transpose(data, (3, 2, 0, 1)).astype(np.float32)\n            elif torch_postfix == \"dwconv.0.weight\":\n                data = np.transpose(data, (2, 3, 0, 1)).astype(np.float32)\n            weights_dict[torch_name] = data\n        elif \"top_conv/kernel:0\" == name:\n            torch_name = \"features.top.0.weight\"\n            weights_dict[torch_name] = np.transpose(data, (3, 2, 0, 1)).astype(np.float32)\n        elif \"top_bn/gamma:0\" == name:\n            torch_name = \"features.top.1.weight\"\n            weights_dict[torch_name] = data\n        elif \"top_bn/beta:0\" == name:\n            torch_name = \"features.top.1.bias\"\n            weights_dict[torch_name] = data\n        elif \"top_bn/moving_mean:0\" == name:\n            torch_name = \"features.top.1.running_mean\"\n            weights_dict[torch_name] = data\n        elif \"top_bn/moving_variance:0\" == name:\n            torch_name = \"features.top.1.running_var\"\n            weights_dict[torch_name] = data\n        elif \"predictions/kernel:0\" == name:\n            torch_name = \"classifier.1.weight\"\n            weights_dict[torch_name] = np.transpose(data, (1, 0)).astype(np.float32)\n        elif \"predictions/bias:0\" == name:\n            torch_name = \"classifier.1.bias\"\n            weights_dict[torch_name] = data\n        else:\n            raise KeyError(\"no match key '{}'\".format(name))\n\n    for k, v in weights_dict.items():\n        weights_dict[k] = torch.as_tensor(v)\n\n    torch.save(weights_dict, save_path)\n    print(\"Conversion complete.\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/Test9_efficientNet/utils.py",
    "content": "import os\nimport sys\nimport json\nimport pickle\nimport random\n\nimport torch\nfrom tqdm import tqdm\n\nimport matplotlib.pyplot as plt\n\n\ndef read_split_data(root: str, val_rate: float = 0.2):\n    random.seed(0)  # 保证随机结果可复现\n    assert os.path.exists(root), \"dataset root: {} does not exist.\".format(root)\n\n    # 遍历文件夹，一个文件夹对应一个类别\n    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]\n    # 排序，保证各平台顺序一致\n    flower_class.sort()\n    # 生成类别名称以及对应的数字索引\n    class_indices = dict((k, v) for v, k in enumerate(flower_class))\n    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    train_images_path = []  # 存储训练集的所有图片路径\n    train_images_label = []  # 存储训练集图片对应索引信息\n    val_images_path = []  # 存储验证集的所有图片路径\n    val_images_label = []  # 存储验证集图片对应索引信息\n    every_class_num = []  # 存储每个类别的样本总数\n    supported = [\".jpg\", \".JPG\", \".png\", \".PNG\"]  # 支持的文件后缀类型\n    # 遍历每个文件夹下的文件\n    for cla in flower_class:\n        cla_path = os.path.join(root, cla)\n        # 遍历获取supported支持的所有文件路径\n        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)\n                  if os.path.splitext(i)[-1] in supported]\n        # 排序，保证各平台顺序一致\n        images.sort()\n        # 获取该类别对应的索引\n        image_class = class_indices[cla]\n        # 记录该类别的样本数量\n        every_class_num.append(len(images))\n        # 按比例随机采样验证样本\n        val_path = random.sample(images, k=int(len(images) * val_rate))\n\n        for img_path in images:\n            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集\n                val_images_path.append(img_path)\n                val_images_label.append(image_class)\n            else:  # 否则存入训练集\n                train_images_path.append(img_path)\n                train_images_label.append(image_class)\n\n    print(\"{} images were found in the dataset.\".format(sum(every_class_num)))\n    print(\"{} images for training.\".format(len(train_images_path)))\n    print(\"{} images for validation.\".format(len(val_images_path)))\n    assert len(train_images_path) > 0, \"number of training images must greater than 0.\"\n    assert len(val_images_path) > 0, \"number of validation images must greater than 0.\"\n\n    plot_image = False\n    if plot_image:\n        # 绘制每种类别个数柱状图\n        plt.bar(range(len(flower_class)), every_class_num, align='center')\n        # 将横坐标0,1,2,3,4替换为相应的类别名称\n        plt.xticks(range(len(flower_class)), flower_class)\n        # 在柱状图上添加数值标签\n        for i, v in enumerate(every_class_num):\n            plt.text(x=i, y=v + 5, s=str(v), ha='center')\n        # 设置x坐标\n        plt.xlabel('image class')\n        # 设置y坐标\n        plt.ylabel('number of images')\n        # 设置柱状图的标题\n        plt.title('flower class distribution')\n        plt.show()\n\n    return train_images_path, train_images_label, val_images_path, val_images_label\n\n\ndef plot_data_loader_image(data_loader):\n    batch_size = data_loader.batch_size\n    plot_num = min(batch_size, 4)\n\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), json_path + \" does not exist.\"\n    json_file = open(json_path, 'r')\n    class_indices = json.load(json_file)\n\n    for data in data_loader:\n        images, labels = data\n        for i in range(plot_num):\n            # [C, H, W] -> [H, W, C]\n            img = images[i].numpy().transpose(1, 2, 0)\n            # 反Normalize操作\n            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255\n            label = labels[i].item()\n            plt.subplot(1, plot_num, i+1)\n            plt.xlabel(class_indices[str(label)])\n            plt.xticks([])  # 去掉x轴的刻度\n            plt.yticks([])  # 去掉y轴的刻度\n            plt.imshow(img.astype('uint8'))\n        plt.show()\n\n\ndef write_pickle(list_info: list, file_name: str):\n    with open(file_name, 'wb') as f:\n        pickle.dump(list_info, f)\n\n\ndef read_pickle(file_name: str) -> list:\n    with open(file_name, 'rb') as f:\n        info_list = pickle.load(f)\n        return info_list\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch):\n    model.train()\n    loss_function = torch.nn.CrossEntropyLoss()\n    mean_loss = torch.zeros(1).to(device)\n    optimizer.zero_grad()\n\n    data_loader = tqdm(data_loader, file=sys.stdout)\n\n    for step, data in enumerate(data_loader):\n        images, labels = data\n\n        pred = model(images.to(device))\n\n        loss = loss_function(pred, labels.to(device))\n        loss.backward()\n        mean_loss = (mean_loss * step + loss.detach()) / (step + 1)  # update mean losses\n\n        data_loader.desc = \"[epoch {}] mean loss {}\".format(epoch, round(mean_loss.item(), 3))\n\n        if not torch.isfinite(loss):\n            print('WARNING: non-finite loss, ending training ', loss)\n            sys.exit(1)\n\n        optimizer.step()\n        optimizer.zero_grad()\n\n    return mean_loss.item()\n\n\n@torch.no_grad()\ndef evaluate(model, data_loader, device):\n    model.eval()\n\n    # 验证样本总个数\n    total_num = len(data_loader.dataset)\n\n    # 用于存储预测正确的样本个数\n    sum_num = torch.zeros(1).to(device)\n\n    data_loader = tqdm(data_loader, file=sys.stdout)\n\n    for step, data in enumerate(data_loader):\n        images, labels = data\n        pred = model(images.to(device))\n        pred = torch.max(pred, dim=1)[1]\n        sum_num += torch.eq(pred, labels.to(device)).sum()\n\n    return sum_num.item() / total_num\n"
  },
  {
    "path": "pytorch_classification/analyze_weights_featuremap/alexnet_model.py",
    "content": "import torch.nn as nn\nimport torch\n\n\nclass AlexNet(nn.Module):\n    def __init__(self, num_classes=1000, init_weights=False):\n        super(AlexNet, self).__init__()\n        self.features = nn.Sequential(\n            nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2),  # input[3, 224, 224]  output[48, 55, 55]\n            nn.ReLU(inplace=True),\n            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[48, 27, 27]\n            nn.Conv2d(48, 128, kernel_size=5, padding=2),           # output[128, 27, 27]\n            nn.ReLU(inplace=True),\n            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[128, 13, 13]\n            nn.Conv2d(128, 192, kernel_size=3, padding=1),          # output[192, 13, 13]\n            nn.ReLU(inplace=True),\n            nn.Conv2d(192, 192, kernel_size=3, padding=1),          # output[192, 13, 13]\n            nn.ReLU(inplace=True),\n            nn.Conv2d(192, 128, kernel_size=3, padding=1),          # output[128, 13, 13]\n            nn.ReLU(inplace=True),\n            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[128, 6, 6]\n        )\n        self.classifier = nn.Sequential(\n            nn.Dropout(p=0.5),\n            nn.Linear(128 * 6 * 6, 2048),\n            nn.ReLU(inplace=True),\n            nn.Dropout(p=0.5),\n            nn.Linear(2048, 2048),\n            nn.ReLU(inplace=True),\n            nn.Linear(2048, num_classes),\n        )\n        if init_weights:\n            self._initialize_weights()\n\n    def forward(self, x):\n        outputs = []\n        for name, module in self.features.named_children():\n            x = module(x)\n            if name in [\"0\", \"3\", \"6\"]:\n                outputs.append(x)\n\n        return outputs\n\n    def _initialize_weights(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                nn.init.constant_(m.bias, 0)\n"
  },
  {
    "path": "pytorch_classification/analyze_weights_featuremap/analyze_feature_map.py",
    "content": "import torch\nfrom alexnet_model import AlexNet\nfrom resnet_model import resnet34\nimport matplotlib.pyplot as plt\nimport numpy as np\nfrom PIL import Image\nfrom torchvision import transforms\n\ndata_transform = transforms.Compose(\n    [transforms.Resize((224, 224)),\n     transforms.ToTensor(),\n     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])\n\n# data_transform = transforms.Compose(\n#     [transforms.Resize(256),\n#      transforms.CenterCrop(224),\n#      transforms.ToTensor(),\n#      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])\n\n# create model\nmodel = AlexNet(num_classes=5)\n# model = resnet34(num_classes=5)\n# load model weights\nmodel_weight_path = \"./AlexNet.pth\"  # \"./resNet34.pth\"\nmodel.load_state_dict(torch.load(model_weight_path))\nprint(model)\n\n# load image\nimg = Image.open(\"../tulip.jpg\")\n# [N, C, H, W]\nimg = data_transform(img)\n# expand batch dimension\nimg = torch.unsqueeze(img, dim=0)\n\n# forward\nout_put = model(img)\nfor feature_map in out_put:\n    # [N, C, H, W] -> [C, H, W]\n    im = np.squeeze(feature_map.detach().numpy())\n    # [C, H, W] -> [H, W, C]\n    im = np.transpose(im, [1, 2, 0])\n\n    # show top 12 feature maps\n    plt.figure()\n    for i in range(12):\n        ax = plt.subplot(3, 4, i+1)\n        # [H, W, C]\n        plt.imshow(im[:, :, i], cmap='gray')\n    plt.show()\n\n"
  },
  {
    "path": "pytorch_classification/analyze_weights_featuremap/analyze_kernel_weight.py",
    "content": "import torch\nfrom alexnet_model import AlexNet\nfrom resnet_model import resnet34\nimport matplotlib.pyplot as plt\nimport numpy as np\n\n\n# create model\nmodel = AlexNet(num_classes=5)\n# model = resnet34(num_classes=5)\n# load model weights\nmodel_weight_path = \"./AlexNet.pth\"  # \"resNet34.pth\"\nmodel.load_state_dict(torch.load(model_weight_path))\nprint(model)\n\nweights_keys = model.state_dict().keys()\nfor key in weights_keys:\n    # remove num_batches_tracked para(in bn)\n    if \"num_batches_tracked\" in key:\n        continue\n    # [kernel_number, kernel_channel, kernel_height, kernel_width]\n    weight_t = model.state_dict()[key].numpy()\n\n    # read a kernel information\n    # k = weight_t[0, :, :, :]\n\n    # calculate mean, std, min, max\n    weight_mean = weight_t.mean()\n    weight_std = weight_t.std(ddof=1)\n    weight_min = weight_t.min()\n    weight_max = weight_t.max()\n    print(\"mean is {}, std is {}, min is {}, max is {}\".format(weight_mean,\n                                                               weight_std,\n                                                               weight_max,\n                                                               weight_min))\n\n    # plot hist image\n    plt.close()\n    weight_vec = np.reshape(weight_t, [-1])\n    plt.hist(weight_vec, bins=50)\n    plt.title(key)\n    plt.show()\n\n"
  },
  {
    "path": "pytorch_classification/analyze_weights_featuremap/resnet_model.py",
    "content": "import torch.nn as nn\nimport torch\n\n\nclass BasicBlock(nn.Module):\n    expansion = 1\n\n    def __init__(self, in_channel, out_channel, stride=1, downsample=None):\n        super(BasicBlock, self).__init__()\n        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,\n                               kernel_size=3, stride=stride, padding=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(out_channel)\n        self.relu = nn.ReLU()\n        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,\n                               kernel_size=3, stride=1, padding=1, bias=False)\n        self.bn2 = nn.BatchNorm2d(out_channel)\n        self.downsample = downsample\n\n    def forward(self, x):\n        identity = x\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass Bottleneck(nn.Module):\n    expansion = 4\n\n    def __init__(self, in_channel, out_channel, stride=1, downsample=None):\n        super(Bottleneck, self).__init__()\n        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,\n                               kernel_size=1, stride=1, bias=False)  # squeeze channels\n        self.bn1 = nn.BatchNorm2d(out_channel)\n        # -----------------------------------------\n        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,\n                               kernel_size=3, stride=stride, bias=False, padding=1)\n        self.bn2 = nn.BatchNorm2d(out_channel)\n        # -----------------------------------------\n        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion,\n                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels\n        self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n\n    def forward(self, x):\n        identity = x\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass ResNet(nn.Module):\n\n    def __init__(self, block, blocks_num, num_classes=1000, include_top=True):\n        super(ResNet, self).__init__()\n        self.include_top = include_top\n        self.in_channel = 64\n\n        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,\n                               padding=3, bias=False)\n        self.bn1 = nn.BatchNorm2d(self.in_channel)\n        self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.layer1 = self._make_layer(block, 64, blocks_num[0])\n        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)\n        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)\n        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)\n        if self.include_top:\n            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)\n            self.fc = nn.Linear(512 * block.expansion, num_classes)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')\n\n    def _make_layer(self, block, channel, block_num, stride=1):\n        downsample = None\n        if stride != 1 or self.in_channel != channel * block.expansion:\n            downsample = nn.Sequential(\n                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),\n                nn.BatchNorm2d(channel * block.expansion))\n\n        layers = []\n        layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride))\n        self.in_channel = channel * block.expansion\n\n        for _ in range(1, block_num):\n            layers.append(block(self.in_channel, channel))\n\n        return nn.Sequential(*layers)\n\n    def forward(self, x):\n        outputs = []\n        x = self.conv1(x)\n        outputs.append(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n\n        x = self.layer1(x)\n        outputs.append(x)\n        # x = self.layer2(x)\n        # x = self.layer3(x)\n        # x = self.layer4(x)\n        #\n        # if self.include_top:\n        #     x = self.avgpool(x)\n        #     x = torch.flatten(x, 1)\n        #     x = self.fc(x)\n\n        return outputs\n\n\ndef resnet34(num_classes=1000, include_top=True):\n    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)\n\n\ndef resnet101(num_classes=1000, include_top=True):\n    return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)\n"
  },
  {
    "path": "pytorch_classification/custom_dataset/main.py",
    "content": "import os\n\nimport torch\nfrom torchvision import transforms\n\nfrom my_dataset import MyDataSet\nfrom utils import read_split_data, plot_data_loader_image\n\n# https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz\nroot = \"/home/wz/my_github/data_set/flower_data/flower_photos\"  # 数据集所在根目录\n\n\ndef main():\n    device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n    print(\"using {} device.\".format(device))\n\n    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(root)\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.RandomResizedCrop(224),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),\n        \"val\": transforms.Compose([transforms.Resize(256),\n                                   transforms.CenterCrop(224),\n                                   transforms.ToTensor(),\n                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}\n\n    train_data_set = MyDataSet(images_path=train_images_path,\n                               images_class=train_images_label,\n                               transform=data_transform[\"train\"])\n\n    batch_size = 8\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using {} dataloader workers'.format(nw))\n    train_loader = torch.utils.data.DataLoader(train_data_set,\n                                               batch_size=batch_size,\n                                               shuffle=True,\n                                               num_workers=nw,\n                                               collate_fn=train_data_set.collate_fn)\n\n    # plot_data_loader_image(train_loader)\n\n    for step, data in enumerate(train_loader):\n        images, labels = data\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/custom_dataset/my_dataset.py",
    "content": "from PIL import Image\nimport torch\nfrom torch.utils.data import Dataset\n\n\nclass MyDataSet(Dataset):\n    \"\"\"自定义数据集\"\"\"\n\n    def __init__(self, images_path: list, images_class: list, transform=None):\n        self.images_path = images_path\n        self.images_class = images_class\n        self.transform = transform\n\n    def __len__(self):\n        return len(self.images_path)\n\n    def __getitem__(self, item):\n        img = Image.open(self.images_path[item])\n        # RGB为彩色图片，L为灰度图片\n        if img.mode != 'RGB':\n            raise ValueError(\"image: {} isn't RGB mode.\".format(self.images_path[item]))\n        label = self.images_class[item]\n\n        if self.transform is not None:\n            img = self.transform(img)\n\n        return img, label\n\n    @staticmethod\n    def collate_fn(batch):\n        # 官方实现的default_collate可以参考\n        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py\n        images, labels = tuple(zip(*batch))\n\n        images = torch.stack(images, dim=0)\n        labels = torch.as_tensor(labels)\n        return images, labels\n\n"
  },
  {
    "path": "pytorch_classification/custom_dataset/utils.py",
    "content": "import os\nimport json\nimport pickle\nimport random\n\nimport matplotlib.pyplot as plt\n\n\ndef read_split_data(root: str, val_rate: float = 0.2):\n    random.seed(0)  # 保证随机结果可复现\n    assert os.path.exists(root), \"dataset root: {} does not exist.\".format(root)\n\n    # 遍历文件夹，一个文件夹对应一个类别\n    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]\n    # 排序，保证顺序一致\n    flower_class.sort()\n    # 生成类别名称以及对应的数字索引\n    class_indices = dict((k, v) for v, k in enumerate(flower_class))\n    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    train_images_path = []  # 存储训练集的所有图片路径\n    train_images_label = []  # 存储训练集图片对应索引信息\n    val_images_path = []  # 存储验证集的所有图片路径\n    val_images_label = []  # 存储验证集图片对应索引信息\n    every_class_num = []  # 存储每个类别的样本总数\n    supported = [\".jpg\", \".JPG\", \".png\", \".PNG\"]  # 支持的文件后缀类型\n    # 遍历每个文件夹下的文件\n    for cla in flower_class:\n        cla_path = os.path.join(root, cla)\n        # 遍历获取supported支持的所有文件路径\n        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)\n                  if os.path.splitext(i)[-1] in supported]\n        # 获取该类别对应的索引\n        image_class = class_indices[cla]\n        # 记录该类别的样本数量\n        every_class_num.append(len(images))\n        # 按比例随机采样验证样本\n        val_path = random.sample(images, k=int(len(images) * val_rate))\n\n        for img_path in images:\n            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集\n                val_images_path.append(img_path)\n                val_images_label.append(image_class)\n            else:  # 否则存入训练集\n                train_images_path.append(img_path)\n                train_images_label.append(image_class)\n\n    print(\"{} images were found in the dataset.\".format(sum(every_class_num)))\n    print(\"{} images for training.\".format(len(train_images_path)))\n    print(\"{} images for validation.\".format(len(val_images_path)))\n\n    plot_image = False\n    if plot_image:\n        # 绘制每种类别个数柱状图\n        plt.bar(range(len(flower_class)), every_class_num, align='center')\n        # 将横坐标0,1,2,3,4替换为相应的类别名称\n        plt.xticks(range(len(flower_class)), flower_class)\n        # 在柱状图上添加数值标签\n        for i, v in enumerate(every_class_num):\n            plt.text(x=i, y=v + 5, s=str(v), ha='center')\n        # 设置x坐标\n        plt.xlabel('image class')\n        # 设置y坐标\n        plt.ylabel('number of images')\n        # 设置柱状图的标题\n        plt.title('flower class distribution')\n        plt.show()\n\n    return train_images_path, train_images_label, val_images_path, val_images_label\n\n\ndef plot_data_loader_image(data_loader):\n    batch_size = data_loader.batch_size\n    plot_num = min(batch_size, 4)\n\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), json_path + \" does not exist.\"\n    json_file = open(json_path, 'r')\n    class_indices = json.load(json_file)\n\n    for data in data_loader:\n        images, labels = data\n        for i in range(plot_num):\n            # [C, H, W] -> [H, W, C]\n            img = images[i].numpy().transpose(1, 2, 0)\n            # 反Normalize操作\n            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255\n            label = labels[i].item()\n            plt.subplot(1, plot_num, i+1)\n            plt.xlabel(class_indices[str(label)])\n            plt.xticks([])  # 去掉x轴的刻度\n            plt.yticks([])  # 去掉y轴的刻度\n            plt.imshow(img.astype('uint8'))\n        plt.show()\n\n\ndef write_pickle(list_info: list, file_name: str):\n    with open(file_name, 'wb') as f:\n        pickle.dump(list_info, f)\n\n\ndef read_pickle(file_name: str) -> list:\n    with open(file_name, 'rb') as f:\n        info_list = pickle.load(f)\n        return info_list\n"
  },
  {
    "path": "pytorch_classification/grad_cam/README.md",
    "content": "## Grad-CAM\n- Original Impl: [https://github.com/jacobgil/pytorch-grad-cam](https://github.com/jacobgil/pytorch-grad-cam)\n- Grad-CAM简介: [https://b23.tv/1kccjmb](https://b23.tv/1kccjmb)\n- 使用Pytorch实现Grad-CAM并绘制热力图: [https://b23.tv/n1e60vN](https://b23.tv/n1e60vN)\n\n## 使用流程(替换成自己的网络)\n1. 将创建模型部分代码替换成自己创建模型的代码，并载入自己训练好的权重\n2. 根据自己网络设置合适的`target_layers`\n3. 根据自己的网络设置合适的预处理方法\n4. 将要预测的图片路径赋值给`img_path`\n5. 将感兴趣的类别id赋值给`target_category`\n\n"
  },
  {
    "path": "pytorch_classification/grad_cam/imagenet1k_classes.txt",
    "content": "tench, Tinca tinca\ngoldfish, Carassius auratus\ngreat white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias\ntiger shark, Galeocerdo cuvieri\nhammerhead, hammerhead shark\nelectric ray, crampfish, numbfish, torpedo\nstingray\ncock\nhen\nostrich, Struthio camelus\nbrambling, Fringilla montifringilla\ngoldfinch, Carduelis carduelis\nhouse finch, linnet, Carpodacus mexicanus\njunco, snowbird\nindigo bunting, indigo finch, indigo bird, Passerina cyanea\nrobin, American robin, Turdus migratorius\nbulbul\njay\nmagpie\nchickadee\nwater ouzel, dipper\nkite\nbald eagle, American eagle, Haliaeetus leucocephalus\nvulture\ngreat grey owl, great gray owl, Strix nebulosa\nEuropean fire salamander, Salamandra salamandra\ncommon newt, Triturus vulgaris\neft\nspotted salamander, Ambystoma maculatum\naxolotl, mud puppy, Ambystoma mexicanum\nbullfrog, Rana catesbeiana\ntree frog, tree-frog\ntailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui\nloggerhead, loggerhead turtle, Caretta caretta\nleatherback turtle, leatherback, leathery turtle, Dermochelys coriacea\nmud turtle\nterrapin\nbox turtle, box tortoise\nbanded gecko\ncommon iguana, iguana, Iguana iguana\nAmerican chameleon, anole, Anolis carolinensis\nwhiptail, whiptail lizard\nagama\nfrilled lizard, Chlamydosaurus kingi\nalligator lizard\nGila monster, Heloderma suspectum\ngreen lizard, Lacerta viridis\nAfrican chameleon, Chamaeleo chamaeleon\nKomodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis\nAfrican crocodile, Nile crocodile, Crocodylus niloticus\nAmerican alligator, Alligator mississipiensis\ntriceratops\nthunder snake, worm snake, Carphophis amoenus\nringneck snake, ring-necked snake, ring snake\nhognose snake, puff adder, sand viper\ngreen snake, grass snake\nking snake, kingsnake\ngarter snake, grass snake\nwater snake\nvine snake\nnight snake, Hypsiglena torquata\nboa constrictor, Constrictor constrictor\nrock python, rock snake, Python sebae\nIndian cobra, Naja naja\ngreen mamba\nsea snake\nhorned viper, cerastes, sand viper, horned asp, Cerastes cornutus\ndiamondback, diamondback rattlesnake, Crotalus adamanteus\nsidewinder, horned rattlesnake, Crotalus cerastes\ntrilobite\nharvestman, daddy longlegs, Phalangium opilio\nscorpion\nblack and gold garden spider, Argiope aurantia\nbarn spider, Araneus cavaticus\ngarden spider, Aranea diademata\nblack widow, Latrodectus mactans\ntarantula\nwolf spider, hunting spider\ntick\ncentipede\nblack grouse\nptarmigan\nruffed grouse, partridge, Bonasa umbellus\nprairie chicken, prairie grouse, prairie fowl\npeacock\nquail\npartridge\nAfrican grey, African gray, Psittacus erithacus\nmacaw\nsulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita\nlorikeet\ncoucal\nbee eater\nhornbill\nhummingbird\njacamar\ntoucan\ndrake\nred-breasted merganser, Mergus serrator\ngoose\nblack swan, Cygnus atratus\ntusker\nechidna, spiny anteater, anteater\nplatypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus\nwallaby, brush kangaroo\nkoala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus\nwombat\njellyfish\nsea anemone, anemone\nbrain coral\nflatworm, platyhelminth\nnematode, nematode worm, roundworm\nconch\nsnail\nslug\nsea slug, nudibranch\nchiton, coat-of-mail shell, sea cradle, polyplacophore\nchambered nautilus, pearly nautilus, nautilus\nDungeness crab, Cancer magister\nrock crab, Cancer irroratus\nfiddler crab\nking crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica\nAmerican lobster, Northern lobster, Maine lobster, Homarus americanus\nspiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish\ncrayfish, crawfish, crawdad, crawdaddy\nhermit crab\nisopod\nwhite stork, Ciconia ciconia\nblack stork, Ciconia nigra\nspoonbill\nflamingo\nlittle blue heron, Egretta caerulea\nAmerican egret, great white heron, Egretta albus\nbittern\ncrane\nlimpkin, Aramus pictus\nEuropean gallinule, Porphyrio porphyrio\nAmerican coot, marsh hen, mud hen, water hen, Fulica americana\nbustard\nruddy turnstone, Arenaria interpres\nred-backed sandpiper, dunlin, Erolia alpina\nredshank, Tringa totanus\ndowitcher\noystercatcher, oyster catcher\npelican\nking penguin, Aptenodytes patagonica\nalbatross, mollymawk\ngrey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus\nkiller whale, killer, orca, grampus, sea wolf, Orcinus orca\ndugong, Dugong dugon\nsea lion\nChihuahua\nJapanese spaniel\nMaltese dog, Maltese terrier, Maltese\nPekinese, Pekingese, Peke\nShih-Tzu\nBlenheim spaniel\npapillon\ntoy terrier\nRhodesian ridgeback\nAfghan hound, Afghan\nbasset, basset hound\nbeagle\nbloodhound, sleuthhound\nbluetick\nblack-and-tan coonhound\nWalker hound, Walker foxhound\nEnglish foxhound\nredbone\nborzoi, Russian wolfhound\nIrish wolfhound\nItalian greyhound\nwhippet\nIbizan hound, Ibizan Podenco\nNorwegian elkhound, elkhound\notterhound, otter hound\nSaluki, gazelle hound\nScottish deerhound, deerhound\nWeimaraner\nStaffordshire bullterrier, Staffordshire bull terrier\nAmerican Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier\nBedlington terrier\nBorder terrier\nKerry blue terrier\nIrish terrier\nNorfolk terrier\nNorwich terrier\nYorkshire terrier\nwire-haired fox terrier\nLakeland terrier\nSealyham terrier, Sealyham\nAiredale, Airedale terrier\ncairn, cairn terrier\nAustralian terrier\nDandie Dinmont, Dandie Dinmont terrier\nBoston bull, Boston terrier\nminiature schnauzer\ngiant schnauzer\nstandard schnauzer\nScotch terrier, Scottish terrier, Scottie\nTibetan terrier, chrysanthemum dog\nsilky terrier, Sydney silky\nsoft-coated wheaten terrier\nWest Highland white terrier\nLhasa, Lhasa apso\nflat-coated retriever\ncurly-coated retriever\ngolden retriever\nLabrador retriever\nChesapeake Bay retriever\nGerman short-haired pointer\nvizsla, Hungarian pointer\nEnglish setter\nIrish setter, red setter\nGordon setter\nBrittany spaniel\nclumber, clumber spaniel\nEnglish springer, English springer spaniel\nWelsh springer spaniel\ncocker spaniel, English cocker spaniel, cocker\nSussex spaniel\nIrish water spaniel\nkuvasz\nschipperke\ngroenendael\nmalinois\nbriard\nkelpie\nkomondor\nOld English sheepdog, bobtail\nShetland sheepdog, Shetland sheep dog, Shetland\ncollie\nBorder collie\nBouvier des Flandres, Bouviers des Flandres\nRottweiler\nGerman shepherd, German shepherd dog, German police dog, alsatian\nDoberman, Doberman pinscher\nminiature pinscher\nGreater Swiss Mountain dog\nBernese mountain dog\nAppenzeller\nEntleBucher\nboxer\nbull mastiff\nTibetan mastiff\nFrench bulldog\nGreat Dane\nSaint Bernard, St Bernard\nEskimo dog, husky\nmalamute, malemute, Alaskan malamute\nSiberian husky\ndalmatian, coach dog, carriage dog\naffenpinscher, monkey pinscher, monkey dog\nbasenji\npug, pug-dog\nLeonberg\nNewfoundland, Newfoundland dog\nGreat Pyrenees\nSamoyed, Samoyede\nPomeranian\nchow, chow chow\nkeeshond\nBrabancon griffon\nPembroke, Pembroke Welsh corgi\nCardigan, Cardigan Welsh corgi\ntoy poodle\nminiature poodle\nstandard poodle\nMexican hairless\ntimber wolf, grey wolf, gray wolf, Canis lupus\nwhite wolf, Arctic wolf, Canis lupus tundrarum\nred wolf, maned wolf, Canis rufus, Canis niger\ncoyote, prairie wolf, brush wolf, Canis latrans\ndingo, warrigal, warragal, Canis dingo\ndhole, Cuon alpinus\nAfrican hunting dog, hyena dog, Cape hunting dog, Lycaon pictus\nhyena, hyaena\nred fox, Vulpes vulpes\nkit fox, Vulpes macrotis\nArctic fox, white fox, Alopex lagopus\ngrey fox, gray fox, Urocyon cinereoargenteus\ntabby, tabby cat\ntiger cat\nPersian cat\nSiamese cat, Siamese\nEgyptian cat\ncougar, puma, catamount, mountain lion, painter, panther, Felis concolor\nlynx, catamount\nleopard, Panthera pardus\nsnow leopard, ounce, Panthera uncia\njaguar, panther, Panthera onca, Felis onca\nlion, king of beasts, Panthera leo\ntiger, Panthera tigris\ncheetah, chetah, Acinonyx jubatus\nbrown bear, bruin, Ursus arctos\nAmerican black bear, black bear, Ursus americanus, Euarctos americanus\nice bear, polar bear, Ursus Maritimus, Thalarctos maritimus\nsloth bear, Melursus ursinus, Ursus ursinus\nmongoose\nmeerkat, mierkat\ntiger beetle\nladybug, ladybeetle, lady beetle, ladybird, ladybird beetle\nground beetle, carabid beetle\nlong-horned beetle, longicorn, longicorn beetle\nleaf beetle, chrysomelid\ndung beetle\nrhinoceros beetle\nweevil\nfly\nbee\nant, emmet, pismire\ngrasshopper, hopper\ncricket\nwalking stick, walkingstick, stick insect\ncockroach, roach\nmantis, mantid\ncicada, cicala\nleafhopper\nlacewing, lacewing fly\ndragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk\ndamselfly\nadmiral\nringlet, ringlet butterfly\nmonarch, monarch butterfly, milkweed butterfly, Danaus plexippus\ncabbage butterfly\nsulphur butterfly, sulfur butterfly\nlycaenid, lycaenid butterfly\nstarfish, sea star\nsea urchin\nsea cucumber, holothurian\nwood rabbit, cottontail, cottontail rabbit\nhare\nAngora, Angora rabbit\nhamster\nporcupine, hedgehog\nfox squirrel, eastern fox squirrel, Sciurus niger\nmarmot\nbeaver\nguinea pig, Cavia cobaya\nsorrel\nzebra\nhog, pig, grunter, squealer, Sus scrofa\nwild boar, boar, Sus scrofa\nwarthog\nhippopotamus, hippo, river horse, Hippopotamus amphibius\nox\nwater buffalo, water ox, Asiatic buffalo, Bubalus bubalis\nbison\nram, tup\nbighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis\nibex, Capra ibex\nhartebeest\nimpala, Aepyceros melampus\ngazelle\nArabian camel, dromedary, Camelus dromedarius\nllama\nweasel\nmink\npolecat, fitch, foulmart, foumart, Mustela putorius\nblack-footed ferret, ferret, Mustela nigripes\notter\nskunk, polecat, wood pussy\nbadger\narmadillo\nthree-toed sloth, ai, Bradypus tridactylus\norangutan, orang, orangutang, Pongo pygmaeus\ngorilla, Gorilla gorilla\nchimpanzee, chimp, Pan troglodytes\ngibbon, Hylobates lar\nsiamang, Hylobates syndactylus, Symphalangus syndactylus\nguenon, guenon monkey\npatas, hussar monkey, Erythrocebus patas\nbaboon\nmacaque\nlangur\ncolobus, colobus monkey\nproboscis monkey, Nasalis larvatus\nmarmoset\ncapuchin, ringtail, Cebus capucinus\nhowler monkey, howler\ntiti, titi monkey\nspider monkey, Ateles geoffroyi\nsquirrel monkey, Saimiri sciureus\nMadagascar cat, ring-tailed lemur, Lemur catta\nindri, indris, Indri indri, Indri brevicaudatus\nIndian elephant, Elephas maximus\nAfrican elephant, Loxodonta africana\nlesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens\ngiant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca\nbarracouta, snoek\neel\ncoho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch\nrock beauty, Holocanthus tricolor\nanemone fish\nsturgeon\ngar, garfish, garpike, billfish, Lepisosteus osseus\nlionfish\npuffer, pufferfish, blowfish, globefish\nabacus\nabaya\nacademic gown, academic robe, judge's robe\naccordion, piano accordion, squeeze box\nacoustic guitar\naircraft carrier, carrier, flattop, attack aircraft carrier\nairliner\nairship, dirigible\naltar\nambulance\namphibian, amphibious vehicle\nanalog clock\napiary, bee house\napron\nashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin\nassault rifle, assault gun\nbackpack, back pack, knapsack, packsack, rucksack, haversack\nbakery, bakeshop, bakehouse\nbalance beam, beam\nballoon\nballpoint, ballpoint pen, ballpen, Biro\nBand Aid\nbanjo\nbannister, banister, balustrade, balusters, handrail\nbarbell\nbarber chair\nbarbershop\nbarn\nbarometer\nbarrel, cask\nbarrow, garden cart, lawn cart, wheelbarrow\nbaseball\nbasketball\nbassinet\nbassoon\nbathing cap, swimming cap\nbath towel\nbathtub, bathing tub, bath, tub\nbeach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon\nbeacon, lighthouse, beacon light, pharos\nbeaker\nbearskin, busby, shako\nbeer bottle\nbeer glass\nbell cote, bell cot\nbib\nbicycle-built-for-two, tandem bicycle, tandem\nbikini, two-piece\nbinder, ring-binder\nbinoculars, field glasses, opera glasses\nbirdhouse\nboathouse\nbobsled, bobsleigh, bob\nbolo tie, bolo, bola tie, bola\nbonnet, poke bonnet\nbookcase\nbookshop, bookstore, bookstall\nbottlecap\nbow\nbow tie, bow-tie, bowtie\nbrass, memorial tablet, plaque\nbrassiere, bra, bandeau\nbreakwater, groin, groyne, mole, bulwark, seawall, jetty\nbreastplate, aegis, egis\nbroom\nbucket, pail\nbuckle\nbulletproof vest\nbullet train, bullet\nbutcher shop, meat market\ncab, hack, taxi, taxicab\ncaldron, cauldron\ncandle, taper, wax light\ncannon\ncanoe\ncan opener, tin opener\ncardigan\ncar mirror\ncarousel, carrousel, merry-go-round, roundabout, whirligig\ncarpenter's kit, tool kit\ncarton\ncar wheel\ncash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM\ncassette\ncassette player\ncastle\ncatamaran\nCD player\ncello, violoncello\ncellular telephone, cellular phone, cellphone, cell, mobile phone\nchain\nchainlink fence\nchain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour\nchain saw, chainsaw\nchest\nchiffonier, commode\nchime, bell, gong\nchina cabinet, china closet\nChristmas stocking\nchurch, church building\ncinema, movie theater, movie theatre, movie house, picture palace\ncleaver, meat cleaver, chopper\ncliff dwelling\ncloak\nclog, geta, patten, sabot\ncocktail shaker\ncoffee mug\ncoffeepot\ncoil, spiral, volute, whorl, helix\ncombination lock\ncomputer keyboard, keypad\nconfectionery, confectionary, candy store\ncontainer ship, containership, container vessel\nconvertible\ncorkscrew, bottle screw\ncornet, horn, trumpet, trump\ncowboy boot\ncowboy hat, ten-gallon hat\ncradle\ncrane\ncrash helmet\ncrate\ncrib, cot\nCrock Pot\ncroquet ball\ncrutch\ncuirass\ndam, dike, dyke\ndesk\ndesktop computer\ndial telephone, dial phone\ndiaper, nappy, napkin\ndigital clock\ndigital watch\ndining table, board\ndishrag, dishcloth\ndishwasher, dish washer, dishwashing machine\ndisk brake, disc brake\ndock, dockage, docking facility\ndogsled, dog sled, dog sleigh\ndome\ndoormat, welcome mat\ndrilling platform, offshore rig\ndrum, membranophone, tympan\ndrumstick\ndumbbell\nDutch oven\nelectric fan, blower\nelectric guitar\nelectric locomotive\nentertainment center\nenvelope\nespresso maker\nface powder\nfeather boa, boa\nfile, file cabinet, filing cabinet\nfireboat\nfire engine, fire truck\nfire screen, fireguard\nflagpole, flagstaff\nflute, transverse flute\nfolding chair\nfootball helmet\nforklift\nfountain\nfountain pen\nfour-poster\nfreight car\nFrench horn, horn\nfrying pan, frypan, skillet\nfur coat\ngarbage truck, dustcart\ngasmask, respirator, gas helmet\ngas pump, gasoline pump, petrol pump, island dispenser\ngoblet\ngo-kart\ngolf ball\ngolfcart, golf cart\ngondola\ngong, tam-tam\ngown\ngrand piano, grand\ngreenhouse, nursery, glasshouse\ngrille, radiator grille\ngrocery store, grocery, food market, market\nguillotine\nhair slide\nhair spray\nhalf track\nhammer\nhamper\nhand blower, blow dryer, blow drier, hair dryer, hair drier\nhand-held computer, hand-held microcomputer\nhandkerchief, hankie, hanky, hankey\nhard disc, hard disk, fixed disk\nharmonica, mouth organ, harp, mouth harp\nharp\nharvester, reaper\nhatchet\nholster\nhome theater, home theatre\nhoneycomb\nhook, claw\nhoopskirt, crinoline\nhorizontal bar, high bar\nhorse cart, horse-cart\nhourglass\niPod\niron, smoothing iron\njack-o'-lantern\njean, blue jean, denim\njeep, landrover\njersey, T-shirt, tee shirt\njigsaw puzzle\njinrikisha, ricksha, rickshaw\njoystick\nkimono\nknee pad\nknot\nlab coat, laboratory coat\nladle\nlampshade, lamp shade\nlaptop, laptop computer\nlawn mower, mower\nlens cap, lens cover\nletter opener, paper knife, paperknife\nlibrary\nlifeboat\nlighter, light, igniter, ignitor\nlimousine, limo\nliner, ocean liner\nlipstick, lip rouge\nLoafer\nlotion\nloudspeaker, speaker, speaker unit, loudspeaker system, speaker system\nloupe, jeweler's loupe\nlumbermill, sawmill\nmagnetic compass\nmailbag, postbag\nmailbox, letter box\nmaillot\nmaillot, tank suit\nmanhole cover\nmaraca\nmarimba, xylophone\nmask\nmatchstick\nmaypole\nmaze, labyrinth\nmeasuring cup\nmedicine chest, medicine cabinet\nmegalith, megalithic structure\nmicrophone, mike\nmicrowave, microwave oven\nmilitary uniform\nmilk can\nminibus\nminiskirt, mini\nminivan\nmissile\nmitten\nmixing bowl\nmobile home, manufactured home\nModel T\nmodem\nmonastery\nmonitor\nmoped\nmortar\nmortarboard\nmosque\nmosquito net\nmotor scooter, scooter\nmountain bike, all-terrain bike, off-roader\nmountain tent\nmouse, computer mouse\nmousetrap\nmoving van\nmuzzle\nnail\nneck brace\nnecklace\nnipple\nnotebook, notebook computer\nobelisk\noboe, hautboy, hautbois\nocarina, sweet potato\nodometer, hodometer, mileometer, milometer\noil filter\norgan, pipe organ\noscilloscope, scope, cathode-ray oscilloscope, CRO\noverskirt\noxcart\noxygen mask\npacket\npaddle, boat paddle\npaddlewheel, paddle wheel\npadlock\npaintbrush\npajama, pyjama, pj's, jammies\npalace\npanpipe, pandean pipe, syrinx\npaper towel\nparachute, chute\nparallel bars, bars\npark bench\nparking meter\npassenger car, coach, carriage\npatio, terrace\npay-phone, pay-station\npedestal, plinth, footstall\npencil box, pencil case\npencil sharpener\nperfume, essence\nPetri dish\nphotocopier\npick, plectrum, plectron\npickelhaube\npicket fence, paling\npickup, pickup truck\npier\npiggy bank, penny bank\npill bottle\npillow\nping-pong ball\npinwheel\npirate, pirate ship\npitcher, ewer\nplane, carpenter's plane, woodworking plane\nplanetarium\nplastic bag\nplate rack\nplow, plough\nplunger, plumber's helper\nPolaroid camera, Polaroid Land camera\npole\npolice van, police wagon, paddy wagon, patrol wagon, wagon, black Maria\nponcho\npool table, billiard table, snooker table\npop bottle, soda bottle\npot, flowerpot\npotter's wheel\npower drill\nprayer rug, prayer mat\nprinter\nprison, prison house\nprojectile, missile\nprojector\npuck, hockey puck\npunching bag, punch bag, punching ball, punchball\npurse\nquill, quill pen\nquilt, comforter, comfort, puff\nracer, race car, racing car\nracket, racquet\nradiator\nradio, wireless\nradio telescope, radio reflector\nrain barrel\nrecreational vehicle, RV, R.V.\nreel\nreflex camera\nrefrigerator, icebox\nremote control, remote\nrestaurant, eating house, eating place, eatery\nrevolver, six-gun, six-shooter\nrifle\nrocking chair, rocker\nrotisserie\nrubber eraser, rubber, pencil eraser\nrugby ball\nrule, ruler\nrunning shoe\nsafe\nsafety pin\nsaltshaker, salt shaker\nsandal\nsarong\nsax, saxophone\nscabbard\nscale, weighing machine\nschool bus\nschooner\nscoreboard\nscreen, CRT screen\nscrew\nscrewdriver\nseat belt, seatbelt\nsewing machine\nshield, buckler\nshoe shop, shoe-shop, shoe store\nshoji\nshopping basket\nshopping cart\nshovel\nshower cap\nshower curtain\nski\nski mask\nsleeping bag\nslide rule, slipstick\nsliding door\nslot, one-armed bandit\nsnorkel\nsnowmobile\nsnowplow, snowplough\nsoap dispenser\nsoccer ball\nsock\nsolar dish, solar collector, solar furnace\nsombrero\nsoup bowl\nspace bar\nspace heater\nspace shuttle\nspatula\nspeedboat\nspider web, spider's web\nspindle\nsports car, sport car\nspotlight, spot\nstage\nsteam locomotive\nsteel arch bridge\nsteel drum\nstethoscope\nstole\nstone wall\nstopwatch, stop watch\nstove\nstrainer\nstreetcar, tram, tramcar, trolley, trolley car\nstretcher\nstudio couch, day bed\nstupa, tope\nsubmarine, pigboat, sub, U-boat\nsuit, suit of clothes\nsundial\nsunglass\nsunglasses, dark glasses, shades\nsunscreen, sunblock, sun blocker\nsuspension bridge\nswab, swob, mop\nsweatshirt\nswimming trunks, bathing trunks\nswing\nswitch, electric switch, electrical switch\nsyringe\ntable lamp\ntank, army tank, armored combat vehicle, armoured combat vehicle\ntape player\nteapot\nteddy, teddy bear\ntelevision, television system\ntennis ball\nthatch, thatched roof\ntheater curtain, theatre curtain\nthimble\nthresher, thrasher, threshing machine\nthrone\ntile roof\ntoaster\ntobacco shop, tobacconist shop, tobacconist\ntoilet seat\ntorch\ntotem pole\ntow truck, tow car, wrecker\ntoyshop\ntractor\ntrailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi\ntray\ntrench coat\ntricycle, trike, velocipede\ntrimaran\ntripod\ntriumphal arch\ntrolleybus, trolley coach, trackless trolley\ntrombone\ntub, vat\nturnstile\ntypewriter keyboard\numbrella\nunicycle, monocycle\nupright, upright piano\nvacuum, vacuum cleaner\nvase\nvault\nvelvet\nvending machine\nvestment\nviaduct\nviolin, fiddle\nvolleyball\nwaffle iron\nwall clock\nwallet, billfold, notecase, pocketbook\nwardrobe, closet, press\nwarplane, military plane\nwashbasin, handbasin, washbowl, lavabo, wash-hand basin\nwasher, automatic washer, washing machine\nwater bottle\nwater jug\nwater tower\nwhiskey jug\nwhistle\nwig\nwindow screen\nwindow shade\nWindsor tie\nwine bottle\nwing\nwok\nwooden spoon\nwool, woolen, woollen\nworm fence, snake fence, snake-rail fence, Virginia fence\nwreck\nyawl\nyurt\nweb site, website, internet site, site\ncomic book\ncrossword puzzle, crossword\nstreet sign\ntraffic light, traffic signal, stoplight\nbook jacket, dust cover, dust jacket, dust wrapper\nmenu\nplate\nguacamole\nconsomme\nhot pot, hotpot\ntrifle\nice cream, icecream\nice lolly, lolly, lollipop, popsicle\nFrench loaf\nbagel, beigel\npretzel\ncheeseburger\nhotdog, hot dog, red hot\nmashed potato\nhead cabbage\nbroccoli\ncauliflower\nzucchini, courgette\nspaghetti squash\nacorn squash\nbutternut squash\ncucumber, cuke\nartichoke, globe artichoke\nbell pepper\ncardoon\nmushroom\nGranny Smith\nstrawberry\norange\nlemon\nfig\npineapple, ananas\nbanana\njackfruit, jak, jack\ncustard apple\npomegranate\nhay\ncarbonara\nchocolate sauce, chocolate syrup\ndough\nmeat loaf, meatloaf\npizza, pizza pie\npotpie\nburrito\nred wine\nespresso\ncup\neggnog\nalp\nbubble\ncliff, drop, drop-off\ncoral reef\ngeyser\nlakeside, lakeshore\npromontory, headland, head, foreland\nsandbar, sand bar\nseashore, coast, seacoast, sea-coast\nvalley, vale\nvolcano\nballplayer, baseball player\ngroom, bridegroom\nscuba diver\nrapeseed\ndaisy\nyellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum\ncorn\nacorn\nhip, rose hip, rosehip\nbuckeye, horse chestnut, conker\ncoral fungus\nagaric\ngyromitra\nstinkhorn, carrion fungus\nearthstar\nhen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa\nbolete\near, spike, capitulum\ntoilet tissue, toilet paper, bathroom tissue"
  },
  {
    "path": "pytorch_classification/grad_cam/imagenet21k_classes.txt",
    "content": "organism, being\nbenthos\nheterotroph\ncell\nperson, individual, someone, somebody, mortal, soul\nanimal, animate_being, beast, brute, creature, fauna\nplant, flora, plant_life\nfood, nutrient\nartifact, artefact\nhop\ncheck-in\ndressage\ncurvet, vaulting\npiaffe\nfunambulism, tightrope_walking\nrock_climbing\ncontact_sport\noutdoor_sport, field_sport\ngymnastics, gymnastic_exercise\nacrobatics, tumbling\ntrack_and_field\ntrack, running\njumping\nbroad_jump, long_jump\nhigh_jump\nFosbury_flop\nskiing\ncross-country_skiing\nski_jumping\nwater_sport, aquatics\nswimming, swim\nbathe\ndip, plunge\ndive, diving\nfloating, natation\ndead-man's_float, prone_float\nbelly_flop, belly_flopper, belly_whop, belly_whopper\ncliff_diving\nflip\ngainer, full_gainer\nhalf_gainer\njackknife\nswan_dive, swallow_dive\nskin_diving, skin-dive\nscuba_diving\nsnorkeling, snorkel_diving\nsurfing, surfboarding, surfriding\nwater-skiing\nrowing, row\nsculling\nboxing, pugilism, fisticuffs\nprofessional_boxing\nin-fighting\nfight\nrope-a-dope\nspar, sparring\narchery\nsledding\ntobogganing\nluging\nbobsledding\nwrestling, rassling, grappling\nGreco-Roman_wrestling\nprofessional_wrestling\nsumo\nskating\nice_skating\nfigure_skating\nrollerblading\nroller_skating\nskateboarding\nspeed_skating\nracing\nauto_racing, car_racing\nboat_racing\nhydroplane_racing\ncamel_racing\ngreyhound_racing\nhorse_racing\nriding, horseback_riding, equitation\nequestrian_sport\npony-trekking\nshowjumping, stadium_jumping\ncross-country_riding, cross-country_jumping\ncycling\nbicycling\nmotorcycling\ndune_cycling\nblood_sport\nbullfighting, tauromachy\ncockfighting\nhunt, hunting\nbattue\nbeagling\ncoursing\ndeer_hunting, deer_hunt\nducking, duck_hunting\nfox_hunting, foxhunt\npigsticking\nfishing, sportfishing\nangling\nfly-fishing\ntroll, trolling\ncasting, cast\nbait_casting\nfly_casting\novercast\nsurf_casting, surf_fishing\nday_game\nathletic_game\nice_hockey, hockey, hockey_game\ntetherball\nwater_polo\noutdoor_game\ngolf, golf_game\nprofessional_golf\nround_of_golf, round\nmedal_play, stroke_play\nmatch_play\nminiature_golf\ncroquet\nquoits, horseshoes\nshuffleboard, shovelboard\nfield_game\nfield_hockey, hockey\nshinny, shinney\nfootball, football_game\nAmerican_football, American_football_game\nprofessional_football\ntouch_football\nhurling\nrugby, rugby_football, rugger\nball_game, ballgame\nbaseball, baseball_game\nball\nprofessional_baseball\nhardball\nperfect_game\nno-hit_game, no-hitter\none-hitter, 1-hitter\ntwo-hitter, 2-hitter\nthree-hitter, 3-hitter\nfour-hitter, 4-hitter\nfive-hitter, 5-hitter\nsoftball, softball_game\nrounders\nstickball, stickball_game\ncricket\nlacrosse\npolo\npushball\nsoccer, association_football\ncourt_game\nhandball\nracquetball\nfives\nsquash, squash_racquets, squash_rackets\nvolleyball, volleyball_game\njai_alai, pelota\nbadminton\nbattledore, battledore_and_shuttlecock\nbasketball, basketball_game, hoops\nprofessional_basketball\ndeck_tennis\nnetball\ntennis, lawn_tennis\nprofessional_tennis\nsingles\nsingles\ndoubles\ndoubles\nroyal_tennis, real_tennis, court_tennis\npallone\nsport, athletics\nclasp, clench, clutch, clutches, grasp, grip, hold\njudo\nteam_sport\nLast_Supper, Lord's_Supper\nSeder, Passover_supper\ncamping, encampment, bivouacking, tenting\npest\ncritter\ncreepy-crawly\ndarter\npeeper\nhomeotherm, homoiotherm, homotherm\npoikilotherm, ectotherm\nrange_animal\nscavenger\nbottom-feeder, bottom-dweller\nbottom-feeder\nwork_animal\nbeast_of_burden, jument\ndraft_animal\npack_animal, sumpter\ndomestic_animal, domesticated_animal\nfeeder\nfeeder\nstocker\nhatchling\nhead\nmigrator\nmolter, moulter\npet\nstayer\nstunt\nmarine_animal, marine_creature, sea_animal, sea_creature\nby-catch, bycatch\nfemale\nhen\nmale\nadult\nyoung, offspring\norphan\nyoung_mammal\nbaby\npup, whelp\nwolf_pup, wolf_cub\npuppy\ncub, young_carnivore\nlion_cub\nbear_cub\ntiger_cub\nkit\nsuckling\nsire\ndam\nthoroughbred, purebred, pureblood\ngiant\nmutant\ncarnivore\nherbivore\ninsectivore\nacrodont\npleurodont\nmicroorganism, micro-organism\nmonohybrid\narbovirus, arborvirus\nadenovirus\narenavirus\nMarburg_virus\nArenaviridae\nvesiculovirus\nReoviridae\nvariola_major, variola_major_virus\nviroid, virusoid\ncoliphage\nparamyxovirus\npoliovirus\nherpes, herpes_virus\nherpes_simplex_1, HS1, HSV-1, HSV-I\nherpes_zoster, herpes_zoster_virus\nherpes_varicella_zoster, herpes_varicella_zoster_virus\ncytomegalovirus, CMV\nvaricella_zoster_virus\npolyoma, polyoma_virus\nlyssavirus\nreovirus\nrotavirus\nmoneran, moneron\narchaebacteria, archaebacterium, archaeobacteria, archeobacteria\nbacteroid\nBacillus_anthracis, anthrax_bacillus\nYersinia_pestis\nBrucella\nspirillum, spirilla\nbotulinus, botulinum, Clostridium_botulinum\nclostridium_perfringens\ncyanobacteria, blue-green_algae\ntrichodesmium\nnitric_bacteria, nitrobacteria\nspirillum\nFrancisella, genus_Francisella\ngonococcus, Neisseria_gonorrhoeae\nCorynebacterium_diphtheriae, C._diphtheriae, Klebs-Loeffler_bacillus\nenteric_bacteria, enterobacteria, enterics, entric\nklebsiella\nSalmonella_typhimurium\ntyphoid_bacillus, Salmonella_typhosa, Salmonella_typhi\nnitrate_bacterium, nitric_bacterium\nnitrite_bacterium, nitrous_bacterium\nactinomycete\nstreptomyces\nStreptomyces_erythreus\nStreptomyces_griseus\ntubercle_bacillus, Mycobacterium_tuberculosis\npus-forming_bacteria\nstreptobacillus\nmyxobacteria, myxobacterium, myxobacter, gliding_bacteria, slime_bacteria\nstaphylococcus, staphylococci, staph\ndiplococcus\npneumococcus, Diplococcus_pneumoniae\nstreptococcus, streptococci, strep\nspirochete, spirochaete\nplanktonic_algae\nzooplankton\nparasite\nendoparasite, entoparasite, entozoan, entozoon, endozoan\nectoparasite, ectozoan, ectozoon, epizoan, epizoon\npathogen\ncommensal\nmyrmecophile\nprotoctist\nprotozoan, protozoon\nsarcodinian, sarcodine\nheliozoan\nendameba\nameba, amoeba\nglobigerina\ntestacean\narcella\ndifflugia\nciliate, ciliated_protozoan, ciliophoran\nparamecium, paramecia\nstentor\nalga, algae\narame\nseagrass\ngolden_algae\nyellow-green_algae\nbrown_algae\nkelp\nfucoid, fucoid_algae\nfucoid\nfucus\nbladderwrack, Ascophyllum_nodosum\ngreen_algae, chlorophyte\npond_scum\nchlorella\nstonewort\ndesmid\nsea_moss\neukaryote, eucaryote\nprokaryote, procaryote\nzooid\nLeishmania, genus_Leishmania\nzoomastigote, zooflagellate\npolymastigote\ncostia, Costia_necatrix\ngiardia\ncryptomonad, cryptophyte\nsporozoan\nsporozoite\ntrophozoite\nmerozoite\ncoccidium, eimeria\ngregarine\nplasmodium, Plasmodium_vivax, malaria_parasite\nleucocytozoan, leucocytozoon\nmicrosporidian\nOstariophysi, order_Ostariophysi\ncypriniform_fish\nloach\ncyprinid, cyprinid_fish\ncarp\ndomestic_carp, Cyprinus_carpio\nleather_carp\nmirror_carp\nEuropean_bream, Abramis_brama\ntench, Tinca_tinca\ndace, Leuciscus_leuciscus\nchub, Leuciscus_cephalus\nshiner\ncommon_shiner, silversides, Notropis_cornutus\nroach, Rutilus_rutilus\nrudd, Scardinius_erythrophthalmus\nminnow, Phoxinus_phoxinus\ngudgeon, Gobio_gobio\ngoldfish, Carassius_auratus\ncrucian_carp, Carassius_carassius, Carassius_vulgaris\nelectric_eel, Electrophorus_electric\ncatostomid\nbuffalo_fish, buffalofish\nblack_buffalo, Ictiobus_niger\nhog_sucker, hog_molly, Hypentelium_nigricans\nredhorse, redhorse_sucker\ncyprinodont\nkillifish\nmummichog, Fundulus_heteroclitus\nstriped_killifish, mayfish, may_fish, Fundulus_majalis\nrivulus\nflagfish, American_flagfish, Jordanella_floridae\nswordtail, helleri, topminnow, Xyphophorus_helleri\nguppy, rainbow_fish, Lebistes_reticulatus\ntopminnow, poeciliid_fish, poeciliid, live-bearer\nmosquitofish, Gambusia_affinis\nplaty, Platypoecilus_maculatus\nmollie, molly\nsquirrelfish\nreef_squirrelfish, Holocentrus_coruscus\ndeepwater_squirrelfish, Holocentrus_bullisi\nHolocentrus_ascensionis\nsoldierfish, soldier-fish\nanomalops, flashlight_fish\nflashlight_fish, Photoblepharon_palpebratus\nJohn_Dory, Zeus_faber\nboarfish, Capros_aper\nboarfish\ncornetfish\nstickleback, prickleback\nthree-spined_stickleback, Gasterosteus_aculeatus\nten-spined_stickleback, Gasterosteus_pungitius\npipefish, needlefish\ndwarf_pipefish, Syngnathus_hildebrandi\ndeepwater_pipefish, Cosmocampus_profundus\nseahorse, sea_horse\nsnipefish, bellows_fish\nshrimpfish, shrimp-fish\ntrumpetfish, Aulostomus_maculatus\npellicle\nembryo, conceptus, fertilized_egg\nfetus, foetus\nabortus\nspawn\nblastula, blastosphere\nblastocyst, blastodermic_vessicle\ngastrula\nmorula\nyolk, vitellus\nchordate\ncephalochordate\nlancelet, amphioxus\ntunicate, urochordate, urochord\nascidian\nsea_squirt\nsalp, salpa\ndoliolum\nlarvacean\nappendicularia\nascidian_tadpole\nvertebrate, craniate\nAmniota\namniote\naquatic_vertebrate\njawless_vertebrate, jawless_fish, agnathan\nostracoderm\nheterostracan\nanaspid\nconodont\ncyclostome\nlamprey, lamprey_eel, lamper_eel\nsea_lamprey, Petromyzon_marinus\nhagfish, hag, slime_eels\nMyxine_glutinosa\neptatretus\ngnathostome\nplacoderm\ncartilaginous_fish, chondrichthian\nholocephalan, holocephalian\nchimaera\nrabbitfish, Chimaera_monstrosa\nelasmobranch, selachian\nshark\ncow_shark, six-gilled_shark, Hexanchus_griseus\nmackerel_shark\nporbeagle, Lamna_nasus\nmako, mako_shark\nshortfin_mako, Isurus_oxyrhincus\nlongfin_mako, Isurus_paucus\nbonito_shark, blue_pointed, Isurus_glaucus\ngreat_white_shark, white_shark, man-eater, man-eating_shark, Carcharodon_carcharias\nbasking_shark, Cetorhinus_maximus\nthresher, thrasher, thresher_shark, fox_shark, Alopius_vulpinus\ncarpet_shark, Orectolobus_barbatus\nnurse_shark, Ginglymostoma_cirratum\nsand_tiger, sand_shark, Carcharias_taurus, Odontaspis_taurus\nwhale_shark, Rhincodon_typus\nrequiem_shark\nbull_shark, cub_shark, Carcharhinus_leucas\nsandbar_shark, Carcharhinus_plumbeus\nblacktip_shark, sandbar_shark, Carcharhinus_limbatus\nwhitetip_shark, oceanic_whitetip_shark, white-tipped_shark, Carcharinus_longimanus\ndusky_shark, Carcharhinus_obscurus\nlemon_shark, Negaprion_brevirostris\nblue_shark, great_blue_shark, Prionace_glauca\ntiger_shark, Galeocerdo_cuvieri\nsoupfin_shark, soupfin, soup-fin, Galeorhinus_zyopterus\ndogfish\nsmooth_dogfish\nsmoothhound, smoothhound_shark, Mustelus_mustelus\nAmerican_smooth_dogfish, Mustelus_canis\nFlorida_smoothhound, Mustelus_norrisi\nwhitetip_shark, reef_whitetip_shark, Triaenodon_obseus\nspiny_dogfish\nAtlantic_spiny_dogfish, Squalus_acanthias\nPacific_spiny_dogfish, Squalus_suckleyi\nhammerhead, hammerhead_shark\nsmooth_hammerhead, Sphyrna_zygaena\nsmalleye_hammerhead, Sphyrna_tudes\nshovelhead, bonnethead, bonnet_shark, Sphyrna_tiburo\nangel_shark, angelfish, Squatina_squatina, monkfish\nray\nelectric_ray, crampfish, numbfish, torpedo\nsawfish\nsmalltooth_sawfish, Pristis_pectinatus\nguitarfish\nstingray\nroughtail_stingray, Dasyatis_centroura\nbutterfly_ray\neagle_ray\nspotted_eagle_ray, spotted_ray, Aetobatus_narinari\ncownose_ray, cow-nosed_ray, Rhinoptera_bonasus\nmanta, manta_ray, devilfish\nAtlantic_manta, Manta_birostris\ndevil_ray, Mobula_hypostoma\nskate\ngrey_skate, gray_skate, Raja_batis\nlittle_skate, Raja_erinacea\nthorny_skate, Raja_radiata\nbarndoor_skate, Raja_laevis\nbird\ndickeybird, dickey-bird, dickybird, dicky-bird\nfledgling, fledgeling\nnestling, baby_bird\ncock\ngamecock, fighting_cock\nhen\nnester\nnight_bird\nnight_raven\nbird_of_passage\narchaeopteryx, archeopteryx, Archaeopteryx_lithographica\narchaeornis\nratite, ratite_bird, flightless_bird\ncarinate, carinate_bird, flying_bird\nostrich, Struthio_camelus\ncassowary\nemu, Dromaius_novaehollandiae, Emu_novaehollandiae\nkiwi, apteryx\nrhea, Rhea_americana\nrhea, nandu, Pterocnemia_pennata\nelephant_bird, aepyornis\nmoa\npasserine, passeriform_bird\nnonpasserine_bird\noscine, oscine_bird\nsongbird, songster\nhoney_eater, honeysucker\naccentor\nhedge_sparrow, sparrow, dunnock, Prunella_modularis\nlark\nskylark, Alauda_arvensis\nwagtail\npipit, titlark, lark\nmeadow_pipit, Anthus_pratensis\nfinch\nchaffinch, Fringilla_coelebs\nbrambling, Fringilla_montifringilla\ngoldfinch, Carduelis_carduelis\nlinnet, lintwhite, Carduelis_cannabina\nsiskin, Carduelis_spinus\nred_siskin, Carduelis_cucullata\nredpoll, Carduelis_flammea\nredpoll, Carduelis_hornemanni\nNew_World_goldfinch, goldfinch, yellowbird, Spinus_tristis\npine_siskin, pine_finch, Spinus_pinus\nhouse_finch, linnet, Carpodacus_mexicanus\npurple_finch, Carpodacus_purpureus\ncanary, canary_bird\ncommon_canary, Serinus_canaria\nserin\ncrossbill, Loxia_curvirostra\nbullfinch, Pyrrhula_pyrrhula\njunco, snowbird\ndark-eyed_junco, slate-colored_junco, Junco_hyemalis\nNew_World_sparrow\nvesper_sparrow, grass_finch, Pooecetes_gramineus\nwhite-throated_sparrow, whitethroat, Zonotrichia_albicollis\nwhite-crowned_sparrow, Zonotrichia_leucophrys\nchipping_sparrow, Spizella_passerina\nfield_sparrow, Spizella_pusilla\ntree_sparrow, Spizella_arborea\nsong_sparrow, Melospiza_melodia\nswamp_sparrow, Melospiza_georgiana\nbunting\nindigo_bunting, indigo_finch, indigo_bird, Passerina_cyanea\nortolan, ortolan_bunting, Emberiza_hortulana\nreed_bunting, Emberiza_schoeniclus\nyellowhammer, yellow_bunting, Emberiza_citrinella\nyellow-breasted_bunting, Emberiza_aureola\nsnow_bunting, snowbird, snowflake, Plectrophenax_nivalis\nhoneycreeper\nbanana_quit\nsparrow, true_sparrow\nEnglish_sparrow, house_sparrow, Passer_domesticus\ntree_sparrow, Passer_montanus\ngrosbeak, grossbeak\nevening_grosbeak, Hesperiphona_vespertina\nhawfinch, Coccothraustes_coccothraustes\npine_grosbeak, Pinicola_enucleator\ncardinal, cardinal_grosbeak, Richmondena_Cardinalis, Cardinalis_cardinalis, redbird\npyrrhuloxia, Pyrrhuloxia_sinuata\ntowhee\nchewink, cheewink, Pipilo_erythrophthalmus\ngreen-tailed_towhee, Chlorura_chlorura\nweaver, weaverbird, weaver_finch\nbaya, Ploceus_philippinus\nwhydah, whidah, widow_bird\nJava_sparrow, Java_finch, ricebird, Padda_oryzivora\navadavat, amadavat\ngrassfinch, grass_finch\nzebra_finch, Poephila_castanotis\nhoneycreeper, Hawaiian_honeycreeper\nlyrebird\nscrubbird, scrub-bird, scrub_bird\nbroadbill\ntyrannid\nNew_World_flycatcher, flycatcher, tyrant_flycatcher, tyrant_bird\nkingbird, Tyrannus_tyrannus\nArkansas_kingbird, western_kingbird\nCassin's_kingbird, Tyrannus_vociferans\neastern_kingbird\ngrey_kingbird, gray_kingbird, petchary, Tyrannus_domenicensis_domenicensis\npewee, peewee, peewit, pewit, wood_pewee, Contopus_virens\nwestern_wood_pewee, Contopus_sordidulus\nphoebe, phoebe_bird, Sayornis_phoebe\nvermillion_flycatcher, firebird, Pyrocephalus_rubinus_mexicanus\ncotinga, chatterer\ncock_of_the_rock, Rupicola_rupicola\ncock_of_the_rock, Rupicola_peruviana\nmanakin\nbellbird\numbrella_bird, Cephalopterus_ornatus\novenbird\nantbird, ant_bird\nant_thrush\nant_shrike\nspotted_antbird, Hylophylax_naevioides\nwoodhewer, woodcreeper, wood-creeper, tree_creeper\npitta\nscissortail, scissortailed_flycatcher, Muscivora-forficata\nOld_World_flycatcher, true_flycatcher, flycatcher\nspotted_flycatcher, Muscicapa_striata, Muscicapa_grisola\nthickhead, whistler\nthrush\nmissel_thrush, mistle_thrush, mistletoe_thrush, Turdus_viscivorus\nsong_thrush, mavis, throstle, Turdus_philomelos\nfieldfare, snowbird, Turdus_pilaris\nredwing, Turdus_iliacus\nblackbird, merl, merle, ouzel, ousel, European_blackbird, Turdus_merula\nring_ouzel, ring_blackbird, ring_thrush, Turdus_torquatus\nrobin, American_robin, Turdus_migratorius\nclay-colored_robin, Turdus_greyi\nhermit_thrush, Hylocichla_guttata\nveery, Wilson's_thrush, Hylocichla_fuscescens\nwood_thrush, Hylocichla_mustelina\nnightingale, Luscinia_megarhynchos\nthrush_nightingale, Luscinia_luscinia\nbulbul\nOld_World_chat, chat\nstonechat, Saxicola_torquata\nwhinchat, Saxicola_rubetra\nsolitaire\nredstart, redtail\nwheatear\nbluebird\nrobin, redbreast, robin_redbreast, Old_World_robin, Erithacus_rubecola\nbluethroat, Erithacus_svecicus\nwarbler\ngnatcatcher\nkinglet\ngoldcrest, golden-crested_kinglet, Regulus_regulus\ngold-crowned_kinglet, Regulus_satrata\nruby-crowned_kinglet, ruby-crowned_wren, Regulus_calendula\nOld_World_warbler, true_warbler\nblackcap, Silvia_atricapilla\ngreater_whitethroat, whitethroat, Sylvia_communis\nlesser_whitethroat, whitethroat, Sylvia_curruca\nwood_warbler, Phylloscopus_sibilatrix\nsedge_warbler, sedge_bird, sedge_wren, reedbird, Acrocephalus_schoenobaenus\nwren_warbler\ntailorbird, Orthotomus_sutorius\nbabbler, cackler\nNew_World_warbler, wood_warbler\nparula_warbler, northern_parula, Parula_americana\nWilson's_warbler, Wilson's_blackcap, Wilsonia_pusilla\nflycatching_warbler\nAmerican_redstart, redstart, Setophaga_ruticilla\nCape_May_warbler, Dendroica_tigrina\nyellow_warbler, golden_warbler, yellowbird, Dendroica_petechia\nBlackburn, Blackburnian_warbler, Dendroica_fusca\nAudubon's_warbler, Audubon_warbler, Dendroica_auduboni\nmyrtle_warbler, myrtle_bird, Dendroica_coronata\nblackpoll, Dendroica_striate\nNew_World_chat, chat\nyellow-breasted_chat, Icteria_virens\novenbird, Seiurus_aurocapillus\nwater_thrush\nyellowthroat\ncommon_yellowthroat, Maryland_yellowthroat, Geothlypis_trichas\nriflebird, Ptloris_paradisea\nNew_World_oriole, American_oriole, oriole\nnorthern_oriole, Icterus_galbula\nBaltimore_oriole, Baltimore_bird, hangbird, firebird, Icterus_galbula_galbula\nBullock's_oriole, Icterus_galbula_bullockii\norchard_oriole, Icterus_spurius\nmeadowlark, lark\neastern_meadowlark, Sturnella_magna\nwestern_meadowlark, Sturnella_neglecta\ncacique, cazique\nbobolink, ricebird, reedbird, Dolichonyx_oryzivorus\nNew_World_blackbird, blackbird\ngrackle, crow_blackbird\npurple_grackle, Quiscalus_quiscula\nrusty_blackbird, rusty_grackle, Euphagus_carilonus\ncowbird\nred-winged_blackbird, redwing, Agelaius_phoeniceus\nOld_World_oriole, oriole\ngolden_oriole, Oriolus_oriolus\nfig-bird\nstarling\ncommon_starling, Sturnus_vulgaris\nrose-colored_starling, rose-colored_pastor, Pastor_sturnus, Pastor_roseus\nmyna, mynah, mina, minah, myna_bird, mynah_bird\ncrested_myna, Acridotheres_tristis\nhill_myna, Indian_grackle, grackle, Gracula_religiosa\ncorvine_bird\ncrow\nAmerican_crow, Corvus_brachyrhyncos\nraven, Corvus_corax\nrook, Corvus_frugilegus\njackdaw, daw, Corvus_monedula\nchough\njay\nOld_World_jay\ncommon_European_jay, Garullus_garullus\nNew_World_jay\nblue_jay, jaybird, Cyanocitta_cristata\nCanada_jay, grey_jay, gray_jay, camp_robber, whisker_jack, Perisoreus_canadensis\nRocky_Mountain_jay, Perisoreus_canadensis_capitalis\nnutcracker\ncommon_nutcracker, Nucifraga_caryocatactes\nClark's_nutcracker, Nucifraga_columbiana\nmagpie\nEuropean_magpie, Pica_pica\nAmerican_magpie, Pica_pica_hudsonia\nAustralian_magpie\nbutcherbird\ncurrawong, bell_magpie\npiping_crow, piping_crow-shrike, Gymnorhina_tibicen\nwren, jenny_wren\nwinter_wren, Troglodytes_troglodytes\nhouse_wren, Troglodytes_aedon\nmarsh_wren\nlong-billed_marsh_wren, Cistothorus_palustris\nsedge_wren, short-billed_marsh_wren, Cistothorus_platensis\nrock_wren, Salpinctes_obsoletus\nCarolina_wren, Thryothorus_ludovicianus\ncactus_wren\nmockingbird, mocker, Mimus_polyglotktos\nblue_mockingbird, Melanotis_caerulescens\ncatbird, grey_catbird, gray_catbird, Dumetella_carolinensis\nthrasher, mocking_thrush\nbrown_thrasher, brown_thrush, Toxostoma_rufums\nNew_Zealand_wren\nrock_wren, Xenicus_gilviventris\nrifleman_bird, Acanthisitta_chloris\ncreeper, tree_creeper\nbrown_creeper, American_creeper, Certhia_americana\nEuropean_creeper, Certhia_familiaris\nwall_creeper, tichodrome, Tichodroma_muriaria\nEuropean_nuthatch, Sitta_europaea\nred-breasted_nuthatch, Sitta_canadensis\nwhite-breasted_nuthatch, Sitta_carolinensis\ntitmouse, tit\nchickadee\nblack-capped_chickadee, blackcap, Parus_atricapillus\ntufted_titmouse, Parus_bicolor\nCarolina_chickadee, Parus_carolinensis\nblue_tit, tomtit, Parus_caeruleus\nbushtit, bush_tit\nwren-tit, Chamaea_fasciata\nverdin, Auriparus_flaviceps\nfairy_bluebird, bluebird\nswallow\nbarn_swallow, chimney_swallow, Hirundo_rustica\ncliff_swallow, Hirundo_pyrrhonota\ntree_swallow, tree_martin, Hirundo_nigricans\nwhite-bellied_swallow, tree_swallow, Iridoprocne_bicolor\nmartin\nhouse_martin, Delichon_urbica\nbank_martin, bank_swallow, sand_martin, Riparia_riparia\npurple_martin, Progne_subis\nwood_swallow, swallow_shrike\ntanager\nscarlet_tanager, Piranga_olivacea, redbird, firebird\nwestern_tanager, Piranga_ludoviciana\nsummer_tanager, summer_redbird, Piranga_rubra\nhepatic_tanager, Piranga_flava_hepatica\nshrike\nbutcherbird\nEuropean_shrike, Lanius_excubitor\nnorthern_shrike, Lanius_borealis\nwhite-rumped_shrike, Lanius_ludovicianus_excubitorides\nloggerhead_shrike, Lanius_lucovicianus\nmigrant_shrike, Lanius_ludovicianus_migrans\nbush_shrike\nblack-fronted_bush_shrike, Chlorophoneus_nigrifrons\nbowerbird, catbird\nsatin_bowerbird, satin_bird, Ptilonorhynchus_violaceus\ngreat_bowerbird, Chlamydera_nuchalis\nwater_ouzel, dipper\nEuropean_water_ouzel, Cinclus_aquaticus\nAmerican_water_ouzel, Cinclus_mexicanus\nvireo\nred-eyed_vireo, Vireo_olivaceous\nsolitary_vireo, Vireo_solitarius\nblue-headed_vireo, Vireo_solitarius_solitarius\nwaxwing\ncedar_waxwing, cedarbird, Bombycilla_cedrorun\nBohemian_waxwing, Bombycilla_garrulus\nbird_of_prey, raptor, raptorial_bird\nAccipitriformes, order_Accipitriformes\nhawk\neyas\ntiercel, tercel, tercelet\ngoshawk, Accipiter_gentilis\nsparrow_hawk, Accipiter_nisus\nCooper's_hawk, blue_darter, Accipiter_cooperii\nchicken_hawk, hen_hawk\nbuteonine\nredtail, red-tailed_hawk, Buteo_jamaicensis\nrough-legged_hawk, roughleg, Buteo_lagopus\nred-shouldered_hawk, Buteo_lineatus\nbuzzard, Buteo_buteo\nhoney_buzzard, Pernis_apivorus\nkite\nblack_kite, Milvus_migrans\nswallow-tailed_kite, swallow-tailed_hawk, Elanoides_forficatus\nwhite-tailed_kite, Elanus_leucurus\nharrier\nmarsh_harrier, Circus_Aeruginosus\nMontagu's_harrier, Circus_pygargus\nmarsh_hawk, northern_harrier, hen_harrier, Circus_cyaneus\nharrier_eagle, short-toed_eagle\nfalcon\nperegrine, peregrine_falcon, Falco_peregrinus\nfalcon-gentle, falcon-gentil\ngyrfalcon, gerfalcon, Falco_rusticolus\nkestrel, Falco_tinnunculus\nsparrow_hawk, American_kestrel, kestrel, Falco_sparverius\npigeon_hawk, merlin, Falco_columbarius\nhobby, Falco_subbuteo\ncaracara\nAudubon's_caracara, Polyborus_cheriway_audubonii\ncarancha, Polyborus_plancus\neagle, bird_of_Jove\nyoung_bird\neaglet\nharpy, harpy_eagle, Harpia_harpyja\ngolden_eagle, Aquila_chrysaetos\ntawny_eagle, Aquila_rapax\nbald_eagle, American_eagle, Haliaeetus_leucocephalus\nsea_eagle\nKamchatkan_sea_eagle, Stellar's_sea_eagle, Haliaeetus_pelagicus\nern, erne, grey_sea_eagle, gray_sea_eagle, European_sea_eagle, white-tailed_sea_eagle, Haliatus_albicilla\nfishing_eagle, Haliaeetus_leucorhyphus\nosprey, fish_hawk, fish_eagle, sea_eagle, Pandion_haliaetus\nvulture\nAegypiidae, family_Aegypiidae\nOld_World_vulture\ngriffon_vulture, griffon, Gyps_fulvus\nbearded_vulture, lammergeier, lammergeyer, Gypaetus_barbatus\nEgyptian_vulture, Pharaoh's_chicken, Neophron_percnopterus\nblack_vulture, Aegypius_monachus\nsecretary_bird, Sagittarius_serpentarius\nNew_World_vulture, cathartid\nbuzzard, turkey_buzzard, turkey_vulture, Cathartes_aura\ncondor\nAndean_condor, Vultur_gryphus\nCalifornia_condor, Gymnogyps_californianus\nblack_vulture, carrion_crow, Coragyps_atratus\nking_vulture, Sarcorhamphus_papa\nowl, bird_of_Minerva, bird_of_night, hooter\nowlet\nlittle_owl, Athene_noctua\nhorned_owl\ngreat_horned_owl, Bubo_virginianus\ngreat_grey_owl, great_gray_owl, Strix_nebulosa\ntawny_owl, Strix_aluco\nbarred_owl, Strix_varia\nscreech_owl, Otus_asio\nscreech_owl\nscops_owl\nspotted_owl, Strix_occidentalis\nOld_World_scops_owl, Otus_scops\nOriental_scops_owl, Otus_sunia\nhoot_owl\nhawk_owl, Surnia_ulula\nlong-eared_owl, Asio_otus\nlaughing_owl, laughing_jackass, Sceloglaux_albifacies\nbarn_owl, Tyto_alba\namphibian\nIchyostega\nurodele, caudate\nsalamander\nEuropean_fire_salamander, Salamandra_salamandra\nspotted_salamander, fire_salamander, Salamandra_maculosa\nalpine_salamander, Salamandra_atra\nnewt, triton\ncommon_newt, Triturus_vulgaris\nred_eft, Notophthalmus_viridescens\nPacific_newt\nrough-skinned_newt, Taricha_granulosa\nCalifornia_newt, Taricha_torosa\neft\nambystomid, ambystomid_salamander\nmole_salamander, Ambystoma_talpoideum\nspotted_salamander, Ambystoma_maculatum\ntiger_salamander, Ambystoma_tigrinum\naxolotl, mud_puppy, Ambystoma_mexicanum\nwaterdog\nhellbender, mud_puppy, Cryptobranchus_alleganiensis\ngiant_salamander, Megalobatrachus_maximus\nolm, Proteus_anguinus\nmud_puppy, Necturus_maculosus\ndicamptodon, dicamptodontid\nPacific_giant_salamander, Dicamptodon_ensatus\nolympic_salamander, Rhyacotriton_olympicus\nlungless_salamander, plethodont\neastern_red-backed_salamander, Plethodon_cinereus\nwestern_red-backed_salamander, Plethodon_vehiculum\ndusky_salamander\nclimbing_salamander\narboreal_salamander, Aneides_lugubris\nslender_salamander, worm_salamander\nweb-toed_salamander\nShasta_salamander, Hydromantes_shastae\nlimestone_salamander, Hydromantes_brunus\namphiuma, congo_snake, congo_eel, blind_eel\nsiren\nfrog, toad, toad_frog, anuran, batrachian, salientian\ntrue_frog, ranid\nwood-frog, wood_frog, Rana_sylvatica\nleopard_frog, spring_frog, Rana_pipiens\nbullfrog, Rana_catesbeiana\ngreen_frog, spring_frog, Rana_clamitans\ncascades_frog, Rana_cascadae\ngoliath_frog, Rana_goliath\npickerel_frog, Rana_palustris\ntarahumara_frog, Rana_tarahumarae\ngrass_frog, Rana_temporaria\nleptodactylid_frog, leptodactylid\nrobber_frog\nbarking_frog, robber_frog, Hylactophryne_augusti\ncrapaud, South_American_bullfrog, Leptodactylus_pentadactylus\ntree_frog, tree-frog\ntailed_frog, bell_toad, ribbed_toad, tailed_toad, Ascaphus_trui\nLiopelma_hamiltoni\ntrue_toad\nbufo\nagua, agua_toad, Bufo_marinus\nEuropean_toad, Bufo_bufo\nnatterjack, Bufo_calamita\nAmerican_toad, Bufo_americanus\nEurasian_green_toad, Bufo_viridis\nAmerican_green_toad, Bufo_debilis\nYosemite_toad, Bufo_canorus\nTexas_toad, Bufo_speciosus\nsouthwestern_toad, Bufo_microscaphus\nwestern_toad, Bufo_boreas\nobstetrical_toad, midwife_toad, Alytes_obstetricans\nmidwife_toad, Alytes_cisternasi\nfire-bellied_toad, Bombina_bombina\nspadefoot, spadefoot_toad\nwestern_spadefoot, Scaphiopus_hammondii\nsouthern_spadefoot, Scaphiopus_multiplicatus\nplains_spadefoot, Scaphiopus_bombifrons\ntree_toad, tree_frog, tree-frog\nspring_peeper, Hyla_crucifer\nPacific_tree_toad, Hyla_regilla\ncanyon_treefrog, Hyla_arenicolor\nchameleon_tree_frog\ncricket_frog\nnorthern_cricket_frog, Acris_crepitans\neastern_cricket_frog, Acris_gryllus\nchorus_frog\nlowland_burrowing_treefrog, northern_casque-headed_frog, Pternohyla_fodiens\nwestern_narrow-mouthed_toad, Gastrophryne_olivacea\neastern_narrow-mouthed_toad, Gastrophryne_carolinensis\nsheep_frog\ntongueless_frog\nSurinam_toad, Pipa_pipa, Pipa_americana\nAfrican_clawed_frog, Xenopus_laevis\nSouth_American_poison_toad\ncaecilian, blindworm\nreptile, reptilian\nanapsid, anapsid_reptile\ndiapsid, diapsid_reptile\nDiapsida, subclass_Diapsida\nchelonian, chelonian_reptile\nturtle\nsea_turtle, marine_turtle\ngreen_turtle, Chelonia_mydas\nloggerhead, loggerhead_turtle, Caretta_caretta\nridley\nAtlantic_ridley, bastard_ridley, bastard_turtle, Lepidochelys_kempii\nPacific_ridley, olive_ridley, Lepidochelys_olivacea\nhawksbill_turtle, hawksbill, hawkbill, tortoiseshell_turtle, Eretmochelys_imbricata\nleatherback_turtle, leatherback, leathery_turtle, Dermochelys_coriacea\nsnapping_turtle\ncommon_snapping_turtle, snapper, Chelydra_serpentina\nalligator_snapping_turtle, alligator_snapper, Macroclemys_temmincki\nmud_turtle\nmusk_turtle, stinkpot\nterrapin\ndiamondback_terrapin, Malaclemys_centrata\nred-bellied_terrapin, red-bellied_turtle, redbelly, Pseudemys_rubriventris\nslider, yellow-bellied_terrapin, Pseudemys_scripta\ncooter, river_cooter, Pseudemys_concinna\nbox_turtle, box_tortoise\nWestern_box_turtle, Terrapene_ornata\npainted_turtle, painted_terrapin, painted_tortoise, Chrysemys_picta\ntortoise\nEuropean_tortoise, Testudo_graeca\ngiant_tortoise\ngopher_tortoise, gopher_turtle, gopher, Gopherus_polypemus\ndesert_tortoise, Gopherus_agassizii\nTexas_tortoise\nsoft-shelled_turtle, pancake_turtle\nspiny_softshell, Trionyx_spiniferus\nsmooth_softshell, Trionyx_muticus\ntuatara, Sphenodon_punctatum\nsaurian\nlizard\ngecko\nflying_gecko, fringed_gecko, Ptychozoon_homalocephalum\nbanded_gecko\niguanid, iguanid_lizard\ncommon_iguana, iguana, Iguana_iguana\nmarine_iguana, Amblyrhynchus_cristatus\ndesert_iguana, Dipsosaurus_dorsalis\nchuckwalla, Sauromalus_obesus\nzebra-tailed_lizard, gridiron-tailed_lizard, Callisaurus_draconoides\nfringe-toed_lizard, Uma_notata\nearless_lizard\ncollared_lizard\nleopard_lizard\nspiny_lizard\nfence_lizard\nwestern_fence_lizard, swift, blue-belly, Sceloporus_occidentalis\neastern_fence_lizard, pine_lizard, Sceloporus_undulatus\nsagebrush_lizard, Sceloporus_graciosus\nside-blotched_lizard, sand_lizard, Uta_stansburiana\ntree_lizard, Urosaurus_ornatus\nhorned_lizard, horned_toad, horny_frog\nTexas_horned_lizard, Phrynosoma_cornutum\nbasilisk\nAmerican_chameleon, anole, Anolis_carolinensis\nworm_lizard\nnight_lizard\nskink, scincid, scincid_lizard\nwestern_skink, Eumeces_skiltonianus\nmountain_skink, Eumeces_callicephalus\nteiid_lizard, teiid\nwhiptail, whiptail_lizard\nracerunner, race_runner, six-lined_racerunner, Cnemidophorus_sexlineatus\nplateau_striped_whiptail, Cnemidophorus_velox\nChihuahuan_spotted_whiptail, Cnemidophorus_exsanguis\nwestern_whiptail, Cnemidophorus_tigris\ncheckered_whiptail, Cnemidophorus_tesselatus\nteju\ncaiman_lizard\nagamid, agamid_lizard\nagama\nfrilled_lizard, Chlamydosaurus_kingi\nmoloch\nmountain_devil, spiny_lizard, Moloch_horridus\nanguid_lizard\nalligator_lizard\nblindworm, slowworm, Anguis_fragilis\nglass_lizard, glass_snake, joint_snake\nlegless_lizard\nLanthanotus_borneensis\nvenomous_lizard\nGila_monster, Heloderma_suspectum\nbeaded_lizard, Mexican_beaded_lizard, Heloderma_horridum\nlacertid_lizard, lacertid\nsand_lizard, Lacerta_agilis\ngreen_lizard, Lacerta_viridis\nchameleon, chamaeleon\nAfrican_chameleon, Chamaeleo_chamaeleon\nhorned_chameleon, Chamaeleo_oweni\nmonitor, monitor_lizard, varan\nAfrican_monitor, Varanus_niloticus\nKomodo_dragon, Komodo_lizard, dragon_lizard, giant_lizard, Varanus_komodoensis\ncrocodilian_reptile, crocodilian\ncrocodile\nAfrican_crocodile, Nile_crocodile, Crocodylus_niloticus\nAsian_crocodile, Crocodylus_porosus\nMorlett's_crocodile\nfalse_gavial, Tomistoma_schlegeli\nalligator, gator\nAmerican_alligator, Alligator_mississipiensis\nChinese_alligator, Alligator_sinensis\ncaiman, cayman\nspectacled_caiman, Caiman_sclerops\ngavial, Gavialis_gangeticus\narmored_dinosaur\nstegosaur, stegosaurus, Stegosaur_stenops\nankylosaur, ankylosaurus\nEdmontonia\nbone-headed_dinosaur\npachycephalosaur, pachycephalosaurus\nceratopsian, horned_dinosaur\nprotoceratops\ntriceratops\nstyracosaur, styracosaurus\npsittacosaur, psittacosaurus\nornithopod, ornithopod_dinosaur\nhadrosaur, hadrosaurus, duck-billed_dinosaur\ntrachodon, trachodont\nsaurischian, saurischian_dinosaur\nsauropod, sauropod_dinosaur\napatosaur, apatosaurus, brontosaur, brontosaurus, thunder_lizard, Apatosaurus_excelsus\nbarosaur, barosaurus\ndiplodocus\nargentinosaur\ntheropod, theropod_dinosaur, bird-footed_dinosaur\nceratosaur, ceratosaurus\ncoelophysis\ntyrannosaur, tyrannosaurus, Tyrannosaurus_rex\nallosaur, allosaurus\nornithomimid\nmaniraptor\noviraptorid\nvelociraptor\ndeinonychus\nutahraptor, superslasher\nsynapsid, synapsid_reptile\ndicynodont\npelycosaur\ndimetrodon\npterosaur, flying_reptile\npterodactyl\nichthyosaur\nichthyosaurus\nstenopterygius, Stenopterygius_quadrisicissus\nplesiosaur, plesiosaurus\nnothosaur\nsnake, serpent, ophidian\ncolubrid_snake, colubrid\nhoop_snake\nthunder_snake, worm_snake, Carphophis_amoenus\nringneck_snake, ring-necked_snake, ring_snake\nhognose_snake, puff_adder, sand_viper\nleaf-nosed_snake\ngreen_snake, grass_snake\nsmooth_green_snake, Opheodrys_vernalis\nrough_green_snake, Opheodrys_aestivus\ngreen_snake\nracer\nblacksnake, black_racer, Coluber_constrictor\nblue_racer, Coluber_constrictor_flaviventris\nhorseshoe_whipsnake, Coluber_hippocrepis\nwhip-snake, whip_snake, whipsnake\ncoachwhip, coachwhip_snake, Masticophis_flagellum\nCalifornia_whipsnake, striped_racer, Masticophis_lateralis\nSonoran_whipsnake, Masticophis_bilineatus\nrat_snake\ncorn_snake, red_rat_snake, Elaphe_guttata\nblack_rat_snake, blacksnake, pilot_blacksnake, mountain_blacksnake, Elaphe_obsoleta\nchicken_snake\nIndian_rat_snake, Ptyas_mucosus\nglossy_snake, Arizona_elegans\nbull_snake, bull-snake\ngopher_snake, Pituophis_melanoleucus\npine_snake\nking_snake, kingsnake\ncommon_kingsnake, Lampropeltis_getulus\nmilk_snake, house_snake, milk_adder, checkered_adder, Lampropeltis_triangulum\ngarter_snake, grass_snake\ncommon_garter_snake, Thamnophis_sirtalis\nribbon_snake, Thamnophis_sauritus\nWestern_ribbon_snake, Thamnophis_proximus\nlined_snake, Tropidoclonion_lineatum\nground_snake, Sonora_semiannulata\neastern_ground_snake, Potamophis_striatula, Haldea_striatula\nwater_snake\ncommon_water_snake, banded_water_snake, Natrix_sipedon, Nerodia_sipedon\nwater_moccasin\ngrass_snake, ring_snake, ringed_snake, Natrix_natrix\nviperine_grass_snake, Natrix_maura\nred-bellied_snake, Storeria_occipitamaculata\nsand_snake\nbanded_sand_snake, Chilomeniscus_cinctus\nblack-headed_snake\nvine_snake\nlyre_snake\nSonoran_lyre_snake, Trimorphodon_lambda\nnight_snake, Hypsiglena_torquata\nblind_snake, worm_snake\nwestern_blind_snake, Leptotyphlops_humilis\nindigo_snake, gopher_snake, Drymarchon_corais\neastern_indigo_snake, Drymarchon_corais_couperi\nconstrictor\nboa\nboa_constrictor, Constrictor_constrictor\nrubber_boa, tow-headed_snake, Charina_bottae\nrosy_boa, Lichanura_trivirgata\nanaconda, Eunectes_murinus\npython\ncarpet_snake, Python_variegatus, Morelia_spilotes_variegatus\nreticulated_python, Python_reticulatus\nIndian_python, Python_molurus\nrock_python, rock_snake, Python_sebae\namethystine_python\nelapid, elapid_snake\ncoral_snake, harlequin-snake, New_World_coral_snake\neastern_coral_snake, Micrurus_fulvius\nwestern_coral_snake, Micruroides_euryxanthus\ncoral_snake, Old_World_coral_snake\nAfrican_coral_snake, Aspidelaps_lubricus\nAustralian_coral_snake, Rhynchoelaps_australis\ncopperhead, Denisonia_superba\ncobra\nIndian_cobra, Naja_naja\nasp, Egyptian_cobra, Naja_haje\nblack-necked_cobra, spitting_cobra, Naja_nigricollis\nhamadryad, king_cobra, Ophiophagus_hannah, Naja_hannah\nringhals, rinkhals, spitting_snake, Hemachatus_haemachatus\nmamba\nblack_mamba, Dendroaspis_augusticeps\ngreen_mamba\ndeath_adder, Acanthophis_antarcticus\ntiger_snake, Notechis_scutatus\nAustralian_blacksnake, Pseudechis_porphyriacus\nkrait\nbanded_krait, banded_adder, Bungarus_fasciatus\ntaipan, Oxyuranus_scutellatus\nsea_snake\nviper\nadder, common_viper, Vipera_berus\nasp, asp_viper, Vipera_aspis\npuff_adder, Bitis_arietans\ngaboon_viper, Bitis_gabonica\nhorned_viper, cerastes, sand_viper, horned_asp, Cerastes_cornutus\npit_viper\ncopperhead, Agkistrodon_contortrix\nwater_moccasin, cottonmouth, cottonmouth_moccasin, Agkistrodon_piscivorus\nrattlesnake, rattler\ndiamondback, diamondback_rattlesnake, Crotalus_adamanteus\ntimber_rattlesnake, banded_rattlesnake, Crotalus_horridus_horridus\ncanebrake_rattlesnake, canebrake_rattler, Crotalus_horridus_atricaudatus\nprairie_rattlesnake, prairie_rattler, Western_rattlesnake, Crotalus_viridis\nsidewinder, horned_rattlesnake, Crotalus_cerastes\nWestern_diamondback, Western_diamondback_rattlesnake, Crotalus_atrox\nrock_rattlesnake, Crotalus_lepidus\ntiger_rattlesnake, Crotalus_tigris\nMojave_rattlesnake, Crotalus_scutulatus\nspeckled_rattlesnake, Crotalus_mitchellii\nmassasauga, massasauga_rattler, Sistrurus_catenatus\nground_rattler, massasauga, Sistrurus_miliaris\nfer-de-lance, Bothrops_atrops\ncarcase, carcass\ncarrion\narthropod\ntrilobite\narachnid, arachnoid\nharvestman, daddy_longlegs, Phalangium_opilio\nscorpion\nfalse_scorpion, pseudoscorpion\nbook_scorpion, Chelifer_cancroides\nwhip-scorpion, whip_scorpion\nvinegarroon, Mastigoproctus_giganteus\nspider\norb-weaving_spider\nblack_and_gold_garden_spider, Argiope_aurantia\nbarn_spider, Araneus_cavaticus\ngarden_spider, Aranea_diademata\ncomb-footed_spider, theridiid\nblack_widow, Latrodectus_mactans\ntarantula\nwolf_spider, hunting_spider\nEuropean_wolf_spider, tarantula, Lycosa_tarentula\ntrap-door_spider\nacarine\ntick\nhard_tick, ixodid\nIxodes_dammini, deer_tick\nIxodes_neotomae\nIxodes_pacificus, western_black-legged_tick\nIxodes_scapularis, black-legged_tick\nsheep-tick, sheep_tick, Ixodes_ricinus\nIxodes_persulcatus\nIxodes_dentatus\nIxodes_spinipalpis\nwood_tick, American_dog_tick, Dermacentor_variabilis\nsoft_tick, argasid\nmite\nweb-spinning_mite\nacarid\ntrombidiid\ntrombiculid\nharvest_mite, chigger, jigger, redbug\nacarus, genus_Acarus\nitch_mite, sarcoptid\nrust_mite\nspider_mite, tetranychid\nred_spider, red_spider_mite, Panonychus_ulmi\nmyriapod\ngarden_centipede, garden_symphilid, symphilid, Scutigerella_immaculata\ntardigrade\ncentipede\nhouse_centipede, Scutigera_coleoptrata\nmillipede, millepede, milliped\nsea_spider, pycnogonid\nMerostomata, class_Merostomata\nhorseshoe_crab, king_crab, Limulus_polyphemus, Xiphosurus_polyphemus\nAsian_horseshoe_crab\neurypterid\ntongue_worm, pentastomid\ngallinaceous_bird, gallinacean\ndomestic_fowl, fowl, poultry\nDorking\nPlymouth_Rock\nCornish, Cornish_fowl\nRock_Cornish\ngame_fowl\ncochin, cochin_china\njungle_fowl, gallina\njungle_cock\njungle_hen\nred_jungle_fowl, Gallus_gallus\nchicken, Gallus_gallus\nbantam\nchick, biddy\ncock, rooster\ncockerel\ncapon\nhen, biddy\ncackler\nbrood_hen, broody, broody_hen, setting_hen, sitter\nmother_hen\nlayer\npullet\nspring_chicken\nRhode_Island_red\nDominique, Dominick\nOrpington\nturkey, Meleagris_gallopavo\nturkey_cock, gobbler, tom, tom_turkey\nocellated_turkey, Agriocharis_ocellata\ngrouse\nblack_grouse\nEuropean_black_grouse, heathfowl, Lyrurus_tetrix\nAsian_black_grouse, Lyrurus_mlokosiewiczi\nblackcock, black_cock\ngreyhen, grayhen, grey_hen, gray_hen, heath_hen\nptarmigan\nred_grouse, moorfowl, moorbird, moor-bird, moorgame, Lagopus_scoticus\nmoorhen\ncapercaillie, capercailzie, horse_of_the_wood, Tetrao_urogallus\nspruce_grouse, Canachites_canadensis\nsage_grouse, sage_hen, Centrocercus_urophasianus\nruffed_grouse, partridge, Bonasa_umbellus\nsharp-tailed_grouse, sprigtail, sprig_tail, Pedioecetes_phasianellus\nprairie_chicken, prairie_grouse, prairie_fowl\ngreater_prairie_chicken, Tympanuchus_cupido\nlesser_prairie_chicken, Tympanuchus_pallidicinctus\nheath_hen, Tympanuchus_cupido_cupido\nguan\ncurassow\npiping_guan\nchachalaca\nTexas_chachalaca, Ortilis_vetula_macalli\nmegapode, mound_bird, mound-bird, mound_builder, scrub_fowl\nmallee_fowl, leipoa, lowan, Leipoa_ocellata\nmallee_hen\nbrush_turkey, Alectura_lathami\nmaleo, Macrocephalon_maleo\nphasianid\npheasant\nring-necked_pheasant, Phasianus_colchicus\nafropavo, Congo_peafowl, Afropavo_congensis\nargus, argus_pheasant\ngolden_pheasant, Chrysolophus_pictus\nbobwhite, bobwhite_quail, partridge\nnorthern_bobwhite, Colinus_virginianus\nOld_World_quail\nmigratory_quail, Coturnix_coturnix, Coturnix_communis\nmonal, monaul\npeafowl, bird_of_Juno\npeachick, pea-chick\npeacock\npeahen\nblue_peafowl, Pavo_cristatus\ngreen_peafowl, Pavo_muticus\nquail\nCalifornia_quail, Lofortyx_californicus\ntragopan\npartridge\nHungarian_partridge, grey_partridge, gray_partridge, Perdix_perdix\nred-legged_partridge, Alectoris_ruffa\nGreek_partridge, rock_partridge, Alectoris_graeca\nmountain_quail, mountain_partridge, Oreortyx_picta_palmeri\nguinea_fowl, guinea, Numida_meleagris\nguinea_hen\nhoatzin, hoactzin, stinkbird, Opisthocomus_hoazin\ntinamou, partridge\ncolumbiform_bird\ndodo, Raphus_cucullatus\npigeon\npouter_pigeon, pouter\ndove\nrock_dove, rock_pigeon, Columba_livia\nband-tailed_pigeon, band-tail_pigeon, bandtail, Columba_fasciata\nwood_pigeon, ringdove, cushat, Columba_palumbus\nturtledove\nStreptopelia_turtur\nringdove, Streptopelia_risoria\nAustralian_turtledove, turtledove, Stictopelia_cuneata\nmourning_dove, Zenaidura_macroura\ndomestic_pigeon\nsquab\nfairy_swallow\nroller, tumbler, tumbler_pigeon\nhoming_pigeon, homer\ncarrier_pigeon\npassenger_pigeon, Ectopistes_migratorius\nsandgrouse, sand_grouse\npainted_sandgrouse, Pterocles_indicus\npin-tailed_sandgrouse, pin-tailed_grouse, Pterocles_alchata\npallas's_sandgrouse, Syrrhaptes_paradoxus\nparrot\npopinjay\npoll, poll_parrot\nAfrican_grey, African_gray, Psittacus_erithacus\namazon\nmacaw\nkea, Nestor_notabilis\ncockatoo\nsulphur-crested_cockatoo, Kakatoe_galerita, Cacatua_galerita\npink_cockatoo, Kakatoe_leadbeateri\ncockateel, cockatiel, cockatoo_parrot, Nymphicus_hollandicus\nlovebird\nlory\nlorikeet\nvaried_Lorikeet, Glossopsitta_versicolor\nrainbow_lorikeet, Trichoglossus_moluccanus\nparakeet, parrakeet, parroket, paraquet, paroquet, parroquet\nCarolina_parakeet, Conuropsis_carolinensis\nbudgerigar, budgereegah, budgerygah, budgie, grass_parakeet, lovebird, shell_parakeet, Melopsittacus_undulatus\nring-necked_parakeet, Psittacula_krameri\ncuculiform_bird\ncuckoo\nEuropean_cuckoo, Cuculus_canorus\nblack-billed_cuckoo, Coccyzus_erythropthalmus\nroadrunner, chaparral_cock, Geococcyx_californianus\nani\ncoucal\ncrow_pheasant, Centropus_sinensis\ntouraco, turaco, turacou, turakoo\ncoraciiform_bird\nroller\nEuropean_roller, Coracias_garrulus\nground_roller\nkingfisher\nEurasian_kingfisher, Alcedo_atthis\nbelted_kingfisher, Ceryle_alcyon\nkookaburra, laughing_jackass, Dacelo_gigas\nbee_eater\nhornbill\nhoopoe, hoopoo\nEuopean_hoopoe, Upupa_epops\nwood_hoopoe\nmotmot, momot\ntody\napodiform_bird\nswift\nEuropean_swift, Apus_apus\nchimney_swift, chimney_swallow, Chateura_pelagica\nswiftlet, Collocalia_inexpectata\ntree_swift, crested_swift\nhummingbird\nArchilochus_colubris\nthornbill\ngoatsucker, nightjar, caprimulgid\nEuropean_goatsucker, European_nightjar, Caprimulgus_europaeus\nchuck-will's-widow, Caprimulgus_carolinensis\nwhippoorwill, Caprimulgus_vociferus\npoorwill, Phalaenoptilus_nuttallii\nfrogmouth\noilbird, guacharo, Steatornis_caripensis\npiciform_bird\nwoodpecker, peckerwood, pecker\ngreen_woodpecker, Picus_viridis\ndowny_woodpecker\nflicker\nyellow-shafted_flicker, Colaptes_auratus, yellowhammer\ngilded_flicker, Colaptes_chrysoides\nred-shafted_flicker, Colaptes_caper_collaris\nivorybill, ivory-billed_woodpecker, Campephilus_principalis\nredheaded_woodpecker, redhead, Melanerpes_erythrocephalus\nsapsucker\nyellow-bellied_sapsucker, Sphyrapicus_varius\nred-breasted_sapsucker, Sphyrapicus_varius_ruber\nwryneck\npiculet\nbarbet\npuffbird\nhoney_guide\njacamar\ntoucan\ntoucanet\ntrogon\nquetzal, quetzal_bird\nresplendent_quetzel, resplendent_trogon, Pharomacrus_mocino\naquatic_bird\nwaterfowl, water_bird, waterbird\nanseriform_bird\nduck\ndrake\nquack-quack\nduckling\ndiving_duck\ndabbling_duck, dabbler\nmallard, Anas_platyrhynchos\nblack_duck, Anas_rubripes\nteal\ngreenwing, green-winged_teal, Anas_crecca\nbluewing, blue-winged_teal, Anas_discors\ngarganey, Anas_querquedula\nwidgeon, wigeon, Anas_penelope\nAmerican_widgeon, baldpate, Anas_americana\nshoveler, shoveller, broadbill, Anas_clypeata\npintail, pin-tailed_duck, Anas_acuta\nsheldrake\nshelduck\nruddy_duck, Oxyura_jamaicensis\nbufflehead, butterball, dipper, Bucephela_albeola\ngoldeneye, whistler, Bucephela_clangula\nBarrow's_goldeneye, Bucephala_islandica\ncanvasback, canvasback_duck, Aythya_valisineria\npochard, Aythya_ferina\nredhead, Aythya_americana\nscaup, scaup_duck, bluebill, broadbill\ngreater_scaup, Aythya_marila\nlesser_scaup, lesser_scaup_duck, lake_duck, Aythya_affinis\nwild_duck\nwood_duck, summer_duck, wood_widgeon, Aix_sponsa\nwood_drake\nmandarin_duck, Aix_galericulata\nmuscovy_duck, musk_duck, Cairina_moschata\nsea_duck\neider, eider_duck\nscoter, scooter\ncommon_scoter, Melanitta_nigra\nold_squaw, oldwife, Clangula_hyemalis\nmerganser, fish_duck, sawbill, sheldrake\ngoosander, Mergus_merganser\nAmerican_merganser, Mergus_merganser_americanus\nred-breasted_merganser, Mergus_serrator\nsmew, Mergus_albellus\nhooded_merganser, hooded_sheldrake, Lophodytes_cucullatus\ngoose\ngosling\ngander\nChinese_goose, Anser_cygnoides\ngreylag, graylag, greylag_goose, graylag_goose, Anser_anser\nblue_goose, Chen_caerulescens\nsnow_goose\nbrant, brant_goose, brent, brent_goose\ncommon_brant_goose, Branta_bernicla\nhonker, Canada_goose, Canadian_goose, Branta_canadensis\nbarnacle_goose, barnacle, Branta_leucopsis\ncoscoroba\nswan\ncob\npen\ncygnet\nmute_swan, Cygnus_olor\nwhooper, whooper_swan, Cygnus_cygnus\ntundra_swan, Cygnus_columbianus\nwhistling_swan, Cygnus_columbianus_columbianus\nBewick's_swan, Cygnus_columbianus_bewickii\ntrumpeter, trumpeter_swan, Cygnus_buccinator\nblack_swan, Cygnus_atratus\nscreamer\nhorned_screamer, Anhima_cornuta\ncrested_screamer\nchaja, Chauna_torquata\nmammal, mammalian\nfemale_mammal\ntusker\nprototherian\nmonotreme, egg-laying_mammal\nechidna, spiny_anteater, anteater\nechidna, spiny_anteater, anteater\nplatypus, duckbill, duckbilled_platypus, duck-billed_platypus, Ornithorhynchus_anatinus\nmarsupial, pouched_mammal\nopossum, possum\ncommon_opossum, Didelphis_virginiana, Didelphis_marsupialis\ncrab-eating_opossum\nopossum_rat\nbandicoot\nrabbit-eared_bandicoot, rabbit_bandicoot, bilby, Macrotis_lagotis\nkangaroo\ngiant_kangaroo, great_grey_kangaroo, Macropus_giganteus\nwallaby, brush_kangaroo\ncommon_wallaby, Macropus_agiles\nhare_wallaby, kangaroo_hare\nnail-tailed_wallaby, nail-tailed_kangaroo\nrock_wallaby, rock_kangaroo\npademelon, paddymelon\ntree_wallaby, tree_kangaroo\nmusk_kangaroo, Hypsiprymnodon_moschatus\nrat_kangaroo, kangaroo_rat\npotoroo\nbettong\njerboa_kangaroo, kangaroo_jerboa\nphalanger, opossum, possum\ncuscus\nbrush-tailed_phalanger, Trichosurus_vulpecula\nflying_phalanger, flying_opossum, flying_squirrel\nkoala, koala_bear, kangaroo_bear, native_bear, Phascolarctos_cinereus\nwombat\ndasyurid_marsupial, dasyurid\ndasyure\neastern_dasyure, Dasyurus_quoll\nnative_cat, Dasyurus_viverrinus\nthylacine, Tasmanian_wolf, Tasmanian_tiger, Thylacinus_cynocephalus\nTasmanian_devil, ursine_dasyure, Sarcophilus_hariisi\npouched_mouse, marsupial_mouse, marsupial_rat\nnumbat, banded_anteater, anteater, Myrmecobius_fasciatus\npouched_mole, marsupial_mole, Notoryctus_typhlops\nplacental, placental_mammal, eutherian, eutherian_mammal\nlivestock, stock, farm_animal\nbull\ncow\ncalf\ncalf\nyearling\nbuck\ndoe\ninsectivore\nmole\nstarnose_mole, star-nosed_mole, Condylura_cristata\nbrewer's_mole, hair-tailed_mole, Parascalops_breweri\ngolden_mole\nshrew_mole\nAsiatic_shrew_mole, Uropsilus_soricipes\nAmerican_shrew_mole, Neurotrichus_gibbsii\nshrew, shrewmouse\ncommon_shrew, Sorex_araneus\nmasked_shrew, Sorex_cinereus\nshort-tailed_shrew, Blarina_brevicauda\nwater_shrew\nAmerican_water_shrew, Sorex_palustris\nEuropean_water_shrew, Neomys_fodiens\nMediterranean_water_shrew, Neomys_anomalus\nleast_shrew, Cryptotis_parva\nhedgehog, Erinaceus_europaeus, Erinaceus_europeaeus\ntenrec, tendrac\ntailless_tenrec, Tenrec_ecaudatus\notter_shrew, potamogale, Potamogale_velox\neiderdown\naftershaft\nsickle_feather\ncontour_feather\nbastard_wing, alula, spurious_wing\nsaddle_hackle, saddle_feather\nencolure\nhair\nsquama\nscute\nsclerite\nplastron\nscallop_shell\noyster_shell\ntheca\ninvertebrate\nsponge, poriferan, parazoan\nchoanocyte, collar_cell\nglass_sponge\nVenus's_flower_basket\nmetazoan\ncoelenterate, cnidarian\nplanula\npolyp\nmedusa, medusoid, medusan\njellyfish\nscyphozoan\nChrysaora_quinquecirrha\nhydrozoan, hydroid\nhydra\nsiphonophore\nnanomia\nPortuguese_man-of-war, man-of-war, jellyfish\npraya\napolemia\nanthozoan, actinozoan\nsea_anemone, anemone\nactinia, actinian, actiniarian\nsea_pen\ncoral\ngorgonian, gorgonian_coral\nsea_feather\nsea_fan\nred_coral\nstony_coral, madrepore, madriporian_coral\nbrain_coral\nstaghorn_coral, stag's-horn_coral\nmushroom_coral\nctenophore, comb_jelly\nberoe\nplatyctenean\nsea_gooseberry\nVenus's_girdle, Cestum_veneris\nworm\nhelminth, parasitic_worm\nwoodworm\nwoodborer, borer\nacanthocephalan, spiny-headed_worm\narrowworm, chaetognath\nbladder_worm\nflatworm, platyhelminth\nplanarian, planaria\nfluke, trematode, trematode_worm\ncercaria\nliver_fluke, Fasciola_hepatica\nFasciolopsis_buski\nschistosome, blood_fluke\ntapeworm, cestode\nechinococcus\ntaenia\nribbon_worm, nemertean, nemertine, proboscis_worm\nbeard_worm, pogonophoran\nrotifer\nnematode, nematode_worm, roundworm\ncommon_roundworm, Ascaris_lumbricoides\nchicken_roundworm, Ascaridia_galli\npinworm, threadworm, Enterobius_vermicularis\neelworm\nvinegar_eel, vinegar_worm, Anguillula_aceti, Turbatrix_aceti\ntrichina, Trichinella_spiralis\nhookworm\nfilaria\nGuinea_worm, Dracunculus_medinensis\nannelid, annelid_worm, segmented_worm\narchiannelid\noligochaete, oligochaete_worm\nearthworm, angleworm, fishworm, fishing_worm, wiggler, nightwalker, nightcrawler, crawler, dew_worm, red_worm\npolychaete, polychete, polychaete_worm, polychete_worm\nlugworm, lug, lobworm\nsea_mouse\nbloodworm\nleech, bloodsucker, hirudinean\nmedicinal_leech, Hirudo_medicinalis\nhorseleech\nmollusk, mollusc, shellfish\nscaphopod\ntooth_shell, tusk_shell\ngastropod, univalve\nabalone, ear-shell\normer, sea-ear, Haliotis_tuberculata\nscorpion_shell\nconch\ngiant_conch, Strombus_gigas\nsnail\nedible_snail, Helix_pomatia\ngarden_snail\nbrown_snail, Helix_aspersa\nHelix_hortensis\nslug\nseasnail\nneritid, neritid_gastropod\nnerita\nbleeding_tooth, Nerita_peloronta\nneritina\nwhelk\nmoon_shell, moonshell\nperiwinkle, winkle\nlimpet\ncommon_limpet, Patella_vulgata\nkeyhole_limpet, Fissurella_apertura, Diodora_apertura\nriver_limpet, freshwater_limpet, Ancylus_fluviatilis\nsea_slug, nudibranch\nsea_hare, Aplysia_punctata\nHermissenda_crassicornis\nbubble_shell\nphysa\ncowrie, cowry\nmoney_cowrie, Cypraea_moneta\ntiger_cowrie, Cypraea_tigris\nsolenogaster, aplacophoran\nchiton, coat-of-mail_shell, sea_cradle, polyplacophore\nbivalve, pelecypod, lamellibranch\nspat\nclam\nseashell\nsoft-shell_clam, steamer, steamer_clam, long-neck_clam, Mya_arenaria\nquahog, quahaug, hard-shell_clam, hard_clam, round_clam, Venus_mercenaria, Mercenaria_mercenaria\nlittleneck, littleneck_clam\ncherrystone, cherrystone_clam\ngeoduck\nrazor_clam, jackknife_clam, knife-handle\ngiant_clam, Tridacna_gigas\ncockle\nedible_cockle, Cardium_edule\noyster\nJapanese_oyster, Ostrea_gigas\nVirginia_oyster\npearl_oyster, Pinctada_margaritifera\nsaddle_oyster, Anomia_ephippium\nwindow_oyster, windowpane_oyster, capiz, Placuna_placenta\nark_shell\nblood_clam\nmussel\nmarine_mussel, mytilid\nedible_mussel, Mytilus_edulis\nfreshwater_mussel, freshwater_clam\npearly-shelled_mussel\nthin-shelled_mussel\nzebra_mussel, Dreissena_polymorpha\nscallop, scollop, escallop\nbay_scallop, Pecten_irradians\nsea_scallop, giant_scallop, Pecten_magellanicus\nshipworm, teredinid\nteredo\npiddock\ncephalopod, cephalopod_mollusk\nchambered_nautilus, pearly_nautilus, nautilus\noctopod\noctopus, devilfish\npaper_nautilus, nautilus, Argonaut, Argonauta_argo\ndecapod\nsquid\nloligo\nommastrephes\narchiteuthis, giant_squid\ncuttlefish, cuttle\nspirula, Spirula_peronii\ncrustacean\nmalacostracan_crustacean\ndecapod_crustacean, decapod\nbrachyuran\ncrab\nstone_crab, Menippe_mercenaria\nhard-shell_crab\nsoft-shell_crab, soft-shelled_crab\nDungeness_crab, Cancer_magister\nrock_crab, Cancer_irroratus\nJonah_crab, Cancer_borealis\nswimming_crab\nEnglish_lady_crab, Portunus_puber\nAmerican_lady_crab, lady_crab, calico_crab, Ovalipes_ocellatus\nblue_crab, Callinectes_sapidus\nfiddler_crab\npea_crab\nking_crab, Alaska_crab, Alaskan_king_crab, Alaska_king_crab, Paralithodes_camtschatica\nspider_crab\nEuropean_spider_crab, king_crab, Maja_squinado\ngiant_crab, Macrocheira_kaempferi\nlobster\ntrue_lobster\nAmerican_lobster, Northern_lobster, Maine_lobster, Homarus_americanus\nEuropean_lobster, Homarus_vulgaris\nCape_lobster, Homarus_capensis\nNorway_lobster, Nephrops_norvegicus\nspiny_lobster, langouste, rock_lobster, crawfish, crayfish, sea_crawfish\ncrayfish, crawfish, crawdad, crawdaddy\nOld_World_crayfish, ecrevisse\nAmerican_crayfish\nhermit_crab\nshrimp\nsnapping_shrimp, pistol_shrimp\nprawn\nlong-clawed_prawn, river_prawn, Palaemon_australis\ntropical_prawn\nkrill\nEuphausia_pacifica\nopossum_shrimp\nstomatopod, stomatopod_crustacean\nmantis_shrimp, mantis_crab\nsquilla, mantis_prawn\nisopod\nwoodlouse, slater\npill_bug\nsow_bug\nsea_louse, sea_slater\namphipod\nskeleton_shrimp\nwhale_louse\ndaphnia, water_flea\nfairy_shrimp\nbrine_shrimp, Artemia_salina\ntadpole_shrimp\ncopepod, copepod_crustacean\ncyclops, water_flea\nseed_shrimp, mussel_shrimp, ostracod\nbarnacle, cirriped, cirripede\nacorn_barnacle, rock_barnacle, Balanus_balanoides\ngoose_barnacle, gooseneck_barnacle, Lepas_fascicularis\nonychophoran, velvet_worm, peripatus\nwading_bird, wader\nstork\nwhite_stork, Ciconia_ciconia\nblack_stork, Ciconia_nigra\nadjutant_bird, adjutant, adjutant_stork, Leptoptilus_dubius\nmarabou, marabout, marabou_stork, Leptoptilus_crumeniferus\nopenbill\njabiru, Jabiru_mycteria\nsaddlebill, jabiru, Ephippiorhynchus_senegalensis\npoliceman_bird, black-necked_stork, jabiru, Xenorhyncus_asiaticus\nwood_ibis, wood_stork, flinthead, Mycteria_americana\nshoebill, shoebird, Balaeniceps_rex\nibis\nwood_ibis, wood_stork, Ibis_ibis\nsacred_ibis, Threskiornis_aethiopica\nspoonbill\ncommon_spoonbill, Platalea_leucorodia\nroseate_spoonbill, Ajaia_ajaja\nflamingo\nheron\ngreat_blue_heron, Ardea_herodius\ngreat_white_heron, Ardea_occidentalis\negret\nlittle_blue_heron, Egretta_caerulea\nsnowy_egret, snowy_heron, Egretta_thula\nlittle_egret, Egretta_garzetta\ngreat_white_heron, Casmerodius_albus\nAmerican_egret, great_white_heron, Egretta_albus\ncattle_egret, Bubulcus_ibis\nnight_heron, night_raven\nblack-crowned_night_heron, Nycticorax_nycticorax\nyellow-crowned_night_heron, Nyctanassa_violacea\nboatbill, boat-billed_heron, broadbill, Cochlearius_cochlearius\nbittern\nAmerican_bittern, stake_driver, Botaurus_lentiginosus\nEuropean_bittern, Botaurus_stellaris\nleast_bittern, Ixobrychus_exilis\ncrane\nwhooping_crane, whooper, Grus_americana\ncourlan, Aramus_guarauna\nlimpkin, Aramus_pictus\ncrested_cariama, seriema, Cariama_cristata\nchunga, seriema, Chunga_burmeisteri\nrail\nweka, maori_hen, wood_hen\ncrake\ncorncrake, land_rail, Crex_crex\nspotted_crake, Porzana_porzana\ngallinule, marsh_hen, water_hen, swamphen\nFlorida_gallinule, Gallinula_chloropus_cachinnans\nmoorhen, Gallinula_chloropus\npurple_gallinule\nEuropean_gallinule, Porphyrio_porphyrio\nAmerican_gallinule, Porphyrula_martinica\nnotornis, takahe, Notornis_mantelli\ncoot\nAmerican_coot, marsh_hen, mud_hen, water_hen, Fulica_americana\nOld_World_coot, Fulica_atra\nbustard\ngreat_bustard, Otis_tarda\nplain_turkey, Choriotis_australis\nbutton_quail, button-quail, bustard_quail, hemipode\nstriped_button_quail, Turnix_sylvatica\nplain_wanderer, Pedionomus_torquatus\ntrumpeter\nBrazilian_trumpeter, Psophia_crepitans\nseabird, sea_bird, seafowl\nshorebird, shore_bird, limicoline_bird\nplover\npiping_plover, Charadrius_melodus\nkilldeer, kildeer, killdeer_plover, Charadrius_vociferus\ndotterel, dotrel, Charadrius_morinellus, Eudromias_morinellus\ngolden_plover\nlapwing, green_plover, peewit, pewit\nturnstone\nruddy_turnstone, Arenaria_interpres\nblack_turnstone, Arenaria-Melanocephala\nsandpiper\nsurfbird, Aphriza_virgata\nEuropean_sandpiper, Actitis_hypoleucos\nspotted_sandpiper, Actitis_macularia\nleast_sandpiper, stint, Erolia_minutilla\nred-backed_sandpiper, dunlin, Erolia_alpina\ngreenshank, Tringa_nebularia\nredshank, Tringa_totanus\nyellowlegs\ngreater_yellowlegs, Tringa_melanoleuca\nlesser_yellowlegs, Tringa_flavipes\npectoral_sandpiper, jacksnipe, Calidris_melanotos\nknot, greyback, grayback, Calidris_canutus\ncurlew_sandpiper, Calidris_Ferruginea\nsanderling, Crocethia_alba\nupland_sandpiper, upland_plover, Bartramian_sandpiper, Bartramia_longicauda\nruff, Philomachus_pugnax\nreeve\ntattler\nPolynesian_tattler, Heteroscelus_incanus\nwillet, Catoptrophorus_semipalmatus\nwoodcock\nEurasian_woodcock, Scolopax_rusticola\nAmerican_woodcock, woodcock_snipe, Philohela_minor\nsnipe\nwhole_snipe, Gallinago_gallinago\nWilson's_snipe, Gallinago_gallinago_delicata\ngreat_snipe, woodcock_snipe, Gallinago_media\njacksnipe, half_snipe, Limnocryptes_minima\ndowitcher\ngreyback, grayback, Limnodromus_griseus\nred-breasted_snipe, Limnodromus_scolopaceus\ncurlew\nEuropean_curlew, Numenius_arquata\nEskimo_curlew, Numenius_borealis\ngodwit\nHudsonian_godwit, Limosa_haemastica\nstilt, stiltbird, longlegs, long-legs, stilt_plover, Himantopus_stilt\nblack-necked_stilt, Himantopus_mexicanus\nblack-winged_stilt, Himantopus_himantopus\nwhite-headed_stilt, Himantopus_himantopus_leucocephalus\nkaki, Himantopus_novae-zelandiae\nstilt, Australian_stilt\nbanded_stilt, Cladorhyncus_leucocephalum\navocet\noystercatcher, oyster_catcher\nphalarope\nred_phalarope, Phalaropus_fulicarius\nnorthern_phalarope, Lobipes_lobatus\nWilson's_phalarope, Steganopus_tricolor\npratincole, glareole\ncourser\ncream-colored_courser, Cursorius_cursor\ncrocodile_bird, Pluvianus_aegyptius\nstone_curlew, thick-knee, Burhinus_oedicnemus\ncoastal_diving_bird\nlarid\ngull, seagull, sea_gull\nmew, mew_gull, sea_mew, Larus_canus\nblack-backed_gull, great_black-backed_gull, cob, Larus_marinus\nherring_gull, Larus_argentatus\nlaughing_gull, blackcap, pewit, pewit_gull, Larus_ridibundus\nivory_gull, Pagophila_eburnea\nkittiwake\ntern\nsea_swallow, Sterna_hirundo\nskimmer\njaeger\nparasitic_jaeger, arctic_skua, Stercorarius_parasiticus\nskua, bonxie\ngreat_skua, Catharacta_skua\nauk\nauklet\nrazorbill, razor-billed_auk, Alca_torda\nlittle_auk, dovekie, Plautus_alle\nguillemot\nblack_guillemot, Cepphus_grylle\npigeon_guillemot, Cepphus_columba\nmurre\ncommon_murre, Uria_aalge\nthick-billed_murre, Uria_lomvia\npuffin\nAtlantic_puffin, Fratercula_arctica\nhorned_puffin, Fratercula_corniculata\ntufted_puffin, Lunda_cirrhata\ngaviiform_seabird\nloon, diver\npodicipitiform_seabird\ngrebe\ngreat_crested_grebe, Podiceps_cristatus\nred-necked_grebe, Podiceps_grisegena\nblack-necked_grebe, eared_grebe, Podiceps_nigricollis\ndabchick, little_grebe, Podiceps_ruficollis\npied-billed_grebe, Podilymbus_podiceps\npelecaniform_seabird\npelican\nwhite_pelican, Pelecanus_erythrorhynchos\nOld_world_white_pelican, Pelecanus_onocrotalus\nfrigate_bird, man-of-war_bird\ngannet\nsolan, solan_goose, solant_goose, Sula_bassana\nbooby\ncormorant, Phalacrocorax_carbo\nsnakebird, anhinga, darter\nwater_turkey, Anhinga_anhinga\ntropic_bird, tropicbird, boatswain_bird\nsphenisciform_seabird\npenguin\nAdelie, Adelie_penguin, Pygoscelis_adeliae\nking_penguin, Aptenodytes_patagonica\nemperor_penguin, Aptenodytes_forsteri\njackass_penguin, Spheniscus_demersus\nrock_hopper, crested_penguin\npelagic_bird, oceanic_bird\nprocellariiform_seabird\nalbatross, mollymawk\nwandering_albatross, Diomedea_exulans\nblack-footed_albatross, gooney, gooney_bird, goonie, goony, Diomedea_nigripes\npetrel\nwhite-chinned_petrel, Procellaria_aequinoctialis\ngiant_petrel, giant_fulmar, Macronectes_giganteus\nfulmar, fulmar_petrel, Fulmarus_glacialis\nshearwater\nManx_shearwater, Puffinus_puffinus\nstorm_petrel\nstormy_petrel, northern_storm_petrel, Hydrobates_pelagicus\nMother_Carey's_chicken, Mother_Carey's_hen, Oceanites_oceanicus\ndiving_petrel\naquatic_mammal\ncetacean, cetacean_mammal, blower\nwhale\nbaleen_whale, whalebone_whale\nright_whale\nbowhead, bowhead_whale, Greenland_whale, Balaena_mysticetus\nrorqual, razorback\nblue_whale, sulfur_bottom, Balaenoptera_musculus\nfinback, finback_whale, fin_whale, common_rorqual, Balaenoptera_physalus\nsei_whale, Balaenoptera_borealis\nlesser_rorqual, piked_whale, minke_whale, Balaenoptera_acutorostrata\nhumpback, humpback_whale, Megaptera_novaeangliae\ngrey_whale, gray_whale, devilfish, Eschrichtius_gibbosus, Eschrichtius_robustus\ntoothed_whale\nsperm_whale, cachalot, black_whale, Physeter_catodon\npygmy_sperm_whale, Kogia_breviceps\ndwarf_sperm_whale, Kogia_simus\nbeaked_whale\nbottle-nosed_whale, bottlenose_whale, bottlenose, Hyperoodon_ampullatus\ndolphin\ncommon_dolphin, Delphinus_delphis\nbottlenose_dolphin, bottle-nosed_dolphin, bottlenose\nAtlantic_bottlenose_dolphin, Tursiops_truncatus\nPacific_bottlenose_dolphin, Tursiops_gilli\nporpoise\nharbor_porpoise, herring_hog, Phocoena_phocoena\nvaquita, Phocoena_sinus\ngrampus, Grampus_griseus\nkiller_whale, killer, orca, grampus, sea_wolf, Orcinus_orca\npilot_whale, black_whale, common_blackfish, blackfish, Globicephala_melaena\nriver_dolphin\nnarwhal, narwal, narwhale, Monodon_monoceros\nwhite_whale, beluga, Delphinapterus_leucas\nsea_cow, sirenian_mammal, sirenian\nmanatee, Trichechus_manatus\ndugong, Dugong_dugon\nSteller's_sea_cow, Hydrodamalis_gigas\ncarnivore\nomnivore\npinniped_mammal, pinniped, pinnatiped\nseal\ncrabeater_seal, crab-eating_seal\neared_seal\nfur_seal\nguadalupe_fur_seal, Arctocephalus_philippi\nfur_seal\nAlaska_fur_seal, Callorhinus_ursinus\nsea_lion\nSouth_American_sea_lion, Otaria_Byronia\nCalifornia_sea_lion, Zalophus_californianus, Zalophus_californicus\nAustralian_sea_lion, Zalophus_lobatus\nSteller_sea_lion, Steller's_sea_lion, Eumetopias_jubatus\nearless_seal, true_seal, hair_seal\nharbor_seal, common_seal, Phoca_vitulina\nharp_seal, Pagophilus_groenlandicus\nelephant_seal, sea_elephant\nbearded_seal, squareflipper_square_flipper, Erignathus_barbatus\nhooded_seal, bladdernose, Cystophora_cristata\nwalrus, seahorse, sea_horse\nAtlantic_walrus, Odobenus_rosmarus\nPacific_walrus, Odobenus_divergens\nFissipedia\nfissiped_mammal, fissiped\naardvark, ant_bear, anteater, Orycteropus_afer\ncanine, canid\nbitch\nbrood_bitch\ndog, domestic_dog, Canis_familiaris\npooch, doggie, doggy, barker, bow-wow\ncur, mongrel, mutt\nfeist, fice\npariah_dog, pye-dog, pie-dog\nlapdog\ntoy_dog, toy\nChihuahua\nJapanese_spaniel\nMaltese_dog, Maltese_terrier, Maltese\nPekinese, Pekingese, Peke\nShih-Tzu\ntoy_spaniel\nEnglish_toy_spaniel\nBlenheim_spaniel\nKing_Charles_spaniel\npapillon\ntoy_terrier\nhunting_dog\ncourser\nRhodesian_ridgeback\nhound, hound_dog\nAfghan_hound, Afghan\nbasset, basset_hound\nbeagle\nbloodhound, sleuthhound\nbluetick\nboarhound\ncoonhound\ncoondog\nblack-and-tan_coonhound\ndachshund, dachsie, badger_dog\nsausage_dog, sausage_hound\nfoxhound\nAmerican_foxhound\nWalker_hound, Walker_foxhound\nEnglish_foxhound\nharrier\nPlott_hound\nredbone\nwolfhound\nborzoi, Russian_wolfhound\nIrish_wolfhound\ngreyhound\nItalian_greyhound\nwhippet\nIbizan_hound, Ibizan_Podenco\nNorwegian_elkhound, elkhound\notterhound, otter_hound\nSaluki, gazelle_hound\nScottish_deerhound, deerhound\nstaghound\nWeimaraner\nterrier\nbullterrier, bull_terrier\nStaffordshire_bullterrier, Staffordshire_bull_terrier\nAmerican_Staffordshire_terrier, Staffordshire_terrier, American_pit_bull_terrier, pit_bull_terrier\nBedlington_terrier\nBorder_terrier\nKerry_blue_terrier\nIrish_terrier\nNorfolk_terrier\nNorwich_terrier\nYorkshire_terrier\nrat_terrier, ratter\nManchester_terrier, black-and-tan_terrier\ntoy_Manchester, toy_Manchester_terrier\nfox_terrier\nsmooth-haired_fox_terrier\nwire-haired_fox_terrier\nwirehair, wirehaired_terrier, wire-haired_terrier\nLakeland_terrier\nWelsh_terrier\nSealyham_terrier, Sealyham\nAiredale, Airedale_terrier\ncairn, cairn_terrier\nAustralian_terrier\nDandie_Dinmont, Dandie_Dinmont_terrier\nBoston_bull, Boston_terrier\nschnauzer\nminiature_schnauzer\ngiant_schnauzer\nstandard_schnauzer\nScotch_terrier, Scottish_terrier, Scottie\nTibetan_terrier, chrysanthemum_dog\nsilky_terrier, Sydney_silky\nSkye_terrier\nClydesdale_terrier\nsoft-coated_wheaten_terrier\nWest_Highland_white_terrier\nLhasa, Lhasa_apso\nsporting_dog, gun_dog\nbird_dog\nwater_dog\nretriever\nflat-coated_retriever\ncurly-coated_retriever\ngolden_retriever\nLabrador_retriever\nChesapeake_Bay_retriever\npointer, Spanish_pointer\nGerman_short-haired_pointer\nsetter\nvizsla, Hungarian_pointer\nEnglish_setter\nIrish_setter, red_setter\nGordon_setter\nspaniel\nBrittany_spaniel\nclumber, clumber_spaniel\nfield_spaniel\nspringer_spaniel, springer\nEnglish_springer, English_springer_spaniel\nWelsh_springer_spaniel\ncocker_spaniel, English_cocker_spaniel, cocker\nSussex_spaniel\nwater_spaniel\nAmerican_water_spaniel\nIrish_water_spaniel\ngriffon, wire-haired_pointing_griffon\nworking_dog\nwatchdog, guard_dog\nkuvasz\nattack_dog\nhousedog\nschipperke\nshepherd_dog, sheepdog, sheep_dog\nBelgian_sheepdog, Belgian_shepherd\ngroenendael\nmalinois\nbriard\nkelpie\nkomondor\nOld_English_sheepdog, bobtail\nShetland_sheepdog, Shetland_sheep_dog, Shetland\ncollie\nBorder_collie\nBouvier_des_Flandres, Bouviers_des_Flandres\nRottweiler\nGerman_shepherd, German_shepherd_dog, German_police_dog, alsatian\npolice_dog\npinscher\nDoberman, Doberman_pinscher\nminiature_pinscher\nSennenhunde\nGreater_Swiss_Mountain_dog\nBernese_mountain_dog\nAppenzeller\nEntleBucher\nboxer\nmastiff\nbull_mastiff\nTibetan_mastiff\nbulldog, English_bulldog\nFrench_bulldog\nGreat_Dane\nguide_dog\nSeeing_Eye_dog\nhearing_dog\nSaint_Bernard, St_Bernard\nseizure-alert_dog\nsled_dog, sledge_dog\nEskimo_dog, husky\nmalamute, malemute, Alaskan_malamute\nSiberian_husky\ndalmatian, coach_dog, carriage_dog\nliver-spotted_dalmatian\naffenpinscher, monkey_pinscher, monkey_dog\nbasenji\npug, pug-dog\nLeonberg\nNewfoundland, Newfoundland_dog\nGreat_Pyrenees\nspitz\nSamoyed, Samoyede\nPomeranian\nchow, chow_chow\nkeeshond\ngriffon, Brussels_griffon, Belgian_griffon\nBrabancon_griffon\ncorgi, Welsh_corgi\nPembroke, Pembroke_Welsh_corgi\nCardigan, Cardigan_Welsh_corgi\npoodle, poodle_dog\ntoy_poodle\nminiature_poodle\nstandard_poodle\nlarge_poodle\nMexican_hairless\nwolf\ntimber_wolf, grey_wolf, gray_wolf, Canis_lupus\nwhite_wolf, Arctic_wolf, Canis_lupus_tundrarum\nred_wolf, maned_wolf, Canis_rufus, Canis_niger\ncoyote, prairie_wolf, brush_wolf, Canis_latrans\ncoydog\njackal, Canis_aureus\nwild_dog\ndingo, warrigal, warragal, Canis_dingo\ndhole, Cuon_alpinus\ncrab-eating_dog, crab-eating_fox, Dusicyon_cancrivorus\nraccoon_dog, Nyctereutes_procyonides\nAfrican_hunting_dog, hyena_dog, Cape_hunting_dog, Lycaon_pictus\nhyena, hyaena\nstriped_hyena, Hyaena_hyaena\nbrown_hyena, strand_wolf, Hyaena_brunnea\nspotted_hyena, laughing_hyena, Crocuta_crocuta\naardwolf, Proteles_cristata\nfox\nvixen\nReynard\nred_fox, Vulpes_vulpes\nblack_fox\nsilver_fox\nred_fox, Vulpes_fulva\nkit_fox, prairie_fox, Vulpes_velox\nkit_fox, Vulpes_macrotis\nArctic_fox, white_fox, Alopex_lagopus\nblue_fox\ngrey_fox, gray_fox, Urocyon_cinereoargenteus\nfeline, felid\ncat, true_cat\ndomestic_cat, house_cat, Felis_domesticus, Felis_catus\nkitty, kitty-cat, puss, pussy, pussycat\nmouser\nalley_cat\nstray\ntom, tomcat\ngib\ntabby, queen\nkitten, kitty\ntabby, tabby_cat\ntiger_cat\ntortoiseshell, tortoiseshell-cat, calico_cat\nPersian_cat\nAngora, Angora_cat\nSiamese_cat, Siamese\nblue_point_Siamese\nBurmese_cat\nEgyptian_cat\nMaltese, Maltese_cat\nAbyssinian, Abyssinian_cat\nManx, Manx_cat\nwildcat\nsand_cat\nEuropean_wildcat, catamountain, Felis_silvestris\ncougar, puma, catamount, mountain_lion, painter, panther, Felis_concolor\nocelot, panther_cat, Felis_pardalis\njaguarundi, jaguarundi_cat, jaguarondi, eyra, Felis_yagouaroundi\nkaffir_cat, caffer_cat, Felis_ocreata\njungle_cat, Felis_chaus\nserval, Felis_serval\nleopard_cat, Felis_bengalensis\nmargay, margay_cat, Felis_wiedi\nmanul, Pallas's_cat, Felis_manul\nlynx, catamount\ncommon_lynx, Lynx_lynx\nCanada_lynx, Lynx_canadensis\nbobcat, bay_lynx, Lynx_rufus\nspotted_lynx, Lynx_pardina\ncaracal, desert_lynx, Lynx_caracal\nbig_cat, cat\nleopard, Panthera_pardus\nleopardess\npanther\nsnow_leopard, ounce, Panthera_uncia\njaguar, panther, Panthera_onca, Felis_onca\nlion, king_of_beasts, Panthera_leo\nlioness\nlionet\ntiger, Panthera_tigris\nBengal_tiger\ntigress\nliger\ntiglon, tigon\ncheetah, chetah, Acinonyx_jubatus\nsaber-toothed_tiger, sabertooth\nSmiledon_californicus\nbear\nbrown_bear, bruin, Ursus_arctos\nbruin\nSyrian_bear, Ursus_arctos_syriacus\ngrizzly, grizzly_bear, silvertip, silver-tip, Ursus_horribilis, Ursus_arctos_horribilis\nAlaskan_brown_bear, Kodiak_bear, Kodiak, Ursus_middendorffi, Ursus_arctos_middendorffi\nAmerican_black_bear, black_bear, Ursus_americanus, Euarctos_americanus\ncinnamon_bear\nAsiatic_black_bear, black_bear, Ursus_thibetanus, Selenarctos_thibetanus\nice_bear, polar_bear, Ursus_Maritimus, Thalarctos_maritimus\nsloth_bear, Melursus_ursinus, Ursus_ursinus\nviverrine, viverrine_mammal\ncivet, civet_cat\nlarge_civet, Viverra_zibetha\nsmall_civet, Viverricula_indica, Viverricula_malaccensis\nbinturong, bearcat, Arctictis_bintourong\nCryptoprocta, genus_Cryptoprocta\nfossa, fossa_cat, Cryptoprocta_ferox\nfanaloka, Fossa_fossa\ngenet, Genetta_genetta\nbanded_palm_civet, Hemigalus_hardwickii\nmongoose\nIndian_mongoose, Herpestes_nyula\nichneumon, Herpestes_ichneumon\npalm_cat, palm_civet\nmeerkat, mierkat\nslender-tailed_meerkat, Suricata_suricatta\nsuricate, Suricata_tetradactyla\nbat, chiropteran\nfruit_bat, megabat\nflying_fox\nPteropus_capestratus\nPteropus_hypomelanus\nharpy, harpy_bat, tube-nosed_bat, tube-nosed_fruit_bat\nCynopterus_sphinx\ncarnivorous_bat, microbat\nmouse-eared_bat\nleafnose_bat, leaf-nosed_bat\nmacrotus, Macrotus_californicus\nspearnose_bat\nPhyllostomus_hastatus\nhognose_bat, Choeronycteris_mexicana\nhorseshoe_bat\nhorseshoe_bat\norange_bat, orange_horseshoe_bat, Rhinonicteris_aurantius\nfalse_vampire, false_vampire_bat\nbig-eared_bat, Megaderma_lyra\nvespertilian_bat, vespertilionid\nfrosted_bat, Vespertilio_murinus\nred_bat, Lasiurus_borealis\nbrown_bat\nlittle_brown_bat, little_brown_myotis, Myotis_leucifugus\ncave_myotis, Myotis_velifer\nbig_brown_bat, Eptesicus_fuscus\nserotine, European_brown_bat, Eptesicus_serotinus\npallid_bat, cave_bat, Antrozous_pallidus\npipistrelle, pipistrel, Pipistrellus_pipistrellus\neastern_pipistrel, Pipistrellus_subflavus\njackass_bat, spotted_bat, Euderma_maculata\nlong-eared_bat\nwestern_big-eared_bat, Plecotus_townsendi\nfreetail, free-tailed_bat, freetailed_bat\nguano_bat, Mexican_freetail_bat, Tadarida_brasiliensis\npocketed_bat, pocketed_freetail_bat, Tadirida_femorosacca\nmastiff_bat\nvampire_bat, true_vampire_bat\nDesmodus_rotundus\nhairy-legged_vampire_bat, Diphylla_ecaudata\npredator, predatory_animal\nprey, quarry\ngame\nbig_game\ngame_bird\nfossorial_mammal\ntetrapod\nquadruped\nhexapod\nbiped\ninsect\nsocial_insect\nholometabola, metabola\ndefoliator\npollinator\ngallfly\nscorpion_fly\nhanging_fly\ncollembolan, springtail\nbeetle\ntiger_beetle\nladybug, ladybeetle, lady_beetle, ladybird, ladybird_beetle\ntwo-spotted_ladybug, Adalia_bipunctata\nMexican_bean_beetle, bean_beetle, Epilachna_varivestis\nHippodamia_convergens\nvedalia, Rodolia_cardinalis\nground_beetle, carabid_beetle\nbombardier_beetle\ncalosoma\nsearcher, searcher_beetle, Calosoma_scrutator\nfirefly, lightning_bug\nglowworm\nlong-horned_beetle, longicorn, longicorn_beetle\nsawyer, sawyer_beetle\npine_sawyer\nleaf_beetle, chrysomelid\nflea_beetle\nColorado_potato_beetle, Colorado_beetle, potato_bug, potato_beetle, Leptinotarsa_decemlineata\ncarpet_beetle, carpet_bug\nbuffalo_carpet_beetle, Anthrenus_scrophulariae\nblack_carpet_beetle\nclerid_beetle, clerid\nbee_beetle\nlamellicorn_beetle\nscarabaeid_beetle, scarabaeid, scarabaean\ndung_beetle\nscarab, scarabaeus, Scarabaeus_sacer\ntumblebug\ndorbeetle\nJune_beetle, June_bug, May_bug, May_beetle\ngreen_June_beetle, figeater\nJapanese_beetle, Popillia_japonica\nOriental_beetle, Asiatic_beetle, Anomala_orientalis\nrhinoceros_beetle\nmelolonthid_beetle\ncockchafer, May_bug, May_beetle, Melolontha_melolontha\nrose_chafer, rose_bug, Macrodactylus_subspinosus\nrose_chafer, rose_beetle, Cetonia_aurata\nstag_beetle\nelaterid_beetle, elater, elaterid\nclick_beetle, skipjack, snapping_beetle\nfirefly, fire_beetle, Pyrophorus_noctiluca\nwireworm\nwater_beetle\nwhirligig_beetle\ndeathwatch_beetle, deathwatch, Xestobium_rufovillosum\nweevil\nsnout_beetle\nboll_weevil, Anthonomus_grandis\nblister_beetle, meloid\noil_beetle\nSpanish_fly\nDutch-elm_beetle, Scolytus_multistriatus\nbark_beetle\nspruce_bark_beetle, Dendroctonus_rufipennis\nrove_beetle\ndarkling_beetle, darkling_groung_beetle, tenebrionid\nmealworm\nflour_beetle, flour_weevil\nseed_beetle, seed_weevil\npea_weevil, Bruchus_pisorum\nbean_weevil, Acanthoscelides_obtectus\nrice_weevil, black_weevil, Sitophylus_oryzae\nAsian_longhorned_beetle, Anoplophora_glabripennis\nweb_spinner\nlouse, sucking_louse\ncommon_louse, Pediculus_humanus\nhead_louse, Pediculus_capitis\nbody_louse, cootie, Pediculus_corporis\ncrab_louse, pubic_louse, crab, Phthirius_pubis\nbird_louse, biting_louse, louse\nflea\nPulex_irritans\ndog_flea, Ctenocephalides_canis\ncat_flea, Ctenocephalides_felis\nchigoe, chigger, chigoe_flea, Tunga_penetrans\nsticktight, sticktight_flea, Echidnophaga_gallinacea\ndipterous_insect, two-winged_insects, dipteran, dipteron\ngall_midge, gallfly, gall_gnat\nHessian_fly, Mayetiola_destructor\nfly\nhousefly, house_fly, Musca_domestica\ntsetse_fly, tsetse, tzetze_fly, tzetze, glossina\nblowfly, blow_fly\nbluebottle, Calliphora_vicina\ngreenbottle, greenbottle_fly\nflesh_fly, Sarcophaga_carnaria\ntachina_fly\ngadfly\nbotfly\nhuman_botfly, Dermatobia_hominis\nsheep_botfly, sheep_gadfly, Oestrus_ovis\nwarble_fly\nhorsefly, cleg, clegg, horse_fly\nbee_fly\nrobber_fly, bee_killer\nfruit_fly, pomace_fly\napple_maggot, railroad_worm, Rhagoletis_pomonella\nMediterranean_fruit_fly, medfly, Ceratitis_capitata\ndrosophila, Drosophila_melanogaster\nvinegar_fly\nleaf_miner, leaf-miner\nlouse_fly, hippoboscid\nhorse_tick, horsefly, Hippobosca_equina\nsheep_ked, sheep-tick, sheep_tick, Melophagus_Ovinus\nhorn_fly, Haematobia_irritans\nmosquito\nwiggler, wriggler\ngnat\nyellow-fever_mosquito, Aedes_aegypti\nAsian_tiger_mosquito, Aedes_albopictus\nanopheline\nmalarial_mosquito, malaria_mosquito\ncommon_mosquito, Culex_pipiens\nCulex_quinquefasciatus, Culex_fatigans\ngnat\npunkie, punky, punkey, no-see-um, biting_midge\nmidge\nfungus_gnat\npsychodid\nsand_fly, sandfly, Phlebotomus_papatasii\nfungus_gnat, sciara, sciarid\narmyworm\ncrane_fly, daddy_longlegs\nblackfly, black_fly, buffalo_gnat\nhymenopterous_insect, hymenopteran, hymenopteron, hymenopter\nbee\ndrone\nqueen_bee\nworker\nsoldier\nworker_bee\nhoneybee, Apis_mellifera\nAfricanized_bee, Africanized_honey_bee, killer_bee, Apis_mellifera_scutellata, Apis_mellifera_adansonii\nblack_bee, German_bee\nCarniolan_bee\nItalian_bee\ncarpenter_bee\nbumblebee, humblebee\ncuckoo-bumblebee\nandrena, andrenid, mining_bee\nNomia_melanderi, alkali_bee\nleaf-cutting_bee, leaf-cutter, leaf-cutter_bee\nmason_bee\npotter_bee\nwasp\nvespid, vespid_wasp\npaper_wasp\nhornet\ngiant_hornet, Vespa_crabro\ncommon_wasp, Vespula_vulgaris\nbald-faced_hornet, white-faced_hornet, Vespula_maculata\nyellow_jacket, yellow_hornet, Vespula_maculifrons\nPolistes_annularis\nmason_wasp\npotter_wasp\nMutillidae, family_Mutillidae\nvelvet_ant\nsphecoid_wasp, sphecoid\nmason_wasp\ndigger_wasp\ncicada_killer, Sphecius_speciosis\nmud_dauber\ngall_wasp, gallfly, cynipid_wasp, cynipid_gall_wasp\nchalcid_fly, chalcidfly, chalcid, chalcid_wasp\nstrawworm, jointworm\nchalcis_fly\nichneumon_fly\nsawfly\nbirch_leaf_miner, Fenusa_pusilla\nant, emmet, pismire\npharaoh_ant, pharaoh's_ant, Monomorium_pharaonis\nlittle_black_ant, Monomorium_minimum\narmy_ant, driver_ant, legionary_ant\ncarpenter_ant\nfire_ant\nwood_ant, Formica_rufa\nslave_ant\nFormica_fusca\nslave-making_ant, slave-maker\nsanguinary_ant, Formica_sanguinea\nbulldog_ant\nAmazon_ant, Polyergus_rufescens\ntermite, white_ant\ndry-wood_termite\nReticulitermes_lucifugus\nMastotermes_darwiniensis\nMastotermes_electrodominicus\npowder-post_termite, Cryptotermes_brevis\northopterous_insect, orthopteron, orthopteran\ngrasshopper, hopper\nshort-horned_grasshopper, acridid\nlocust\nmigratory_locust, Locusta_migratoria\nmigratory_grasshopper\nlong-horned_grasshopper, tettigoniid\nkatydid\nmormon_cricket, Anabrus_simplex\nsand_cricket, Jerusalem_cricket, Stenopelmatus_fuscus\ncricket\nmole_cricket\nEuropean_house_cricket, Acheta_domestica\nfield_cricket, Acheta_assimilis\ntree_cricket\nsnowy_tree_cricket, Oecanthus_fultoni\nphasmid, phasmid_insect\nwalking_stick, walkingstick, stick_insect\ndiapheromera, Diapheromera_femorata\nwalking_leaf, leaf_insect\ncockroach, roach\noriental_cockroach, oriental_roach, Asiatic_cockroach, blackbeetle, Blatta_orientalis\nAmerican_cockroach, Periplaneta_americana\nAustralian_cockroach, Periplaneta_australasiae\nGerman_cockroach, Croton_bug, crotonbug, water_bug, Blattella_germanica\ngiant_cockroach\nmantis, mantid\npraying_mantis, praying_mantid, Mantis_religioso\nbug\nhemipterous_insect, bug, hemipteran, hemipteron\nleaf_bug, plant_bug\nmirid_bug, mirid, capsid\nfour-lined_plant_bug, four-lined_leaf_bug, Poecilocapsus_lineatus\nlygus_bug\ntarnished_plant_bug, Lygus_lineolaris\nlace_bug\nlygaeid, lygaeid_bug\nchinch_bug, Blissus_leucopterus\ncoreid_bug, coreid\nsquash_bug, Anasa_tristis\nleaf-footed_bug, leaf-foot_bug\nbedbug, bed_bug, chinch, Cimex_lectularius\nbackswimmer, Notonecta_undulata\ntrue_bug\nheteropterous_insect\nwater_bug\ngiant_water_bug\nwater_scorpion\nwater_boatman, boat_bug\nwater_strider, pond-skater, water_skater\ncommon_pond-skater, Gerris_lacustris\nassassin_bug, reduviid\nconenose, cone-nosed_bug, conenose_bug, big_bedbug, kissing_bug\nwheel_bug, Arilus_cristatus\nfirebug\ncotton_stainer\nhomopterous_insect, homopteran\nwhitefly\ncitrus_whitefly, Dialeurodes_citri\ngreenhouse_whitefly, Trialeurodes_vaporariorum\nsweet-potato_whitefly\nsuperbug, Bemisia_tabaci, poinsettia_strain\ncotton_strain\ncoccid_insect\nscale_insect\nsoft_scale\nbrown_soft_scale, Coccus_hesperidum\narmored_scale\nSan_Jose_scale, Aspidiotus_perniciosus\ncochineal_insect, cochineal, Dactylopius_coccus\nmealybug, mealy_bug\ncitrophilous_mealybug, citrophilus_mealybug, Pseudococcus_fragilis\nComstock_mealybug, Comstock's_mealybug, Pseudococcus_comstocki\ncitrus_mealybug, Planococcus_citri\nplant_louse, louse\naphid\napple_aphid, green_apple_aphid, Aphis_pomi\nblackfly, bean_aphid, Aphis_fabae\ngreenfly\ngreen_peach_aphid\nant_cow\nwoolly_aphid, woolly_plant_louse\nwoolly_apple_aphid, American_blight, Eriosoma_lanigerum\nwoolly_alder_aphid, Prociphilus_tessellatus\nadelgid\nbalsam_woolly_aphid, Adelges_piceae\nspruce_gall_aphid, Adelges_abietis\nwoolly_adelgid\njumping_plant_louse, psylla, psyllid\ncicada, cicala\ndog-day_cicada, harvest_fly\nseventeen-year_locust, periodical_cicada, Magicicada_septendecim\nspittle_insect, spittlebug\nfroghopper\nmeadow_spittlebug, Philaenus_spumarius\npine_spittlebug\nSaratoga_spittlebug, Aphrophora_saratogensis\nleafhopper\nplant_hopper, planthopper\ntreehopper\nlantern_fly, lantern-fly\npsocopterous_insect\npsocid\nbark-louse, bark_louse\nbooklouse, book_louse, deathwatch, Liposcelis_divinatorius\ncommon_booklouse, Trogium_pulsatorium\nephemerid, ephemeropteran\nmayfly, dayfly, shadfly\nstonefly, stone_fly, plecopteran\nneuropteron, neuropteran, neuropterous_insect\nant_lion, antlion, antlion_fly\ndoodlebug, ant_lion, antlion\nlacewing, lacewing_fly\naphid_lion, aphis_lion\ngreen_lacewing, chrysopid, stink_fly\nbrown_lacewing, hemerobiid, hemerobiid_fly\ndobson, dobsonfly, dobson_fly, Corydalus_cornutus\nhellgrammiate, dobson\nfish_fly, fish-fly\nalderfly, alder_fly, Sialis_lutaria\nsnakefly\nmantispid\nodonate\ndragonfly, darning_needle, devil's_darning_needle, sewing_needle, snake_feeder, snake_doctor, mosquito_hawk, skeeter_hawk\ndamselfly\ntrichopterous_insect, trichopteran, trichopteron\ncaddis_fly, caddis-fly, caddice_fly, caddice-fly\ncaseworm\ncaddisworm, strawworm\nthysanuran_insect, thysanuron\nbristletail\nsilverfish, Lepisma_saccharina\nfirebrat, Thermobia_domestica\njumping_bristletail, machilid\nthysanopter, thysanopteron, thysanopterous_insect\nthrips, thrip, thripid\ntobacco_thrips, Frankliniella_fusca\nonion_thrips, onion_louse, Thrips_tobaci\nearwig\ncommon_European_earwig, Forficula_auricularia\nlepidopterous_insect, lepidopteron, lepidopteran\nbutterfly\nnymphalid, nymphalid_butterfly, brush-footed_butterfly, four-footed_butterfly\nmourning_cloak, mourning_cloak_butterfly, Camberwell_beauty, Nymphalis_antiopa\ntortoiseshell, tortoiseshell_butterfly\npainted_beauty, Vanessa_virginiensis\nadmiral\nred_admiral, Vanessa_atalanta\nwhite_admiral, Limenitis_camilla\nbanded_purple, white_admiral, Limenitis_arthemis\nred-spotted_purple, Limenitis_astyanax\nviceroy, Limenitis_archippus\nanglewing\nringlet, ringlet_butterfly\ncomma, comma_butterfly, Polygonia_comma\nfritillary\nsilverspot\nemperor_butterfly, emperor\npurple_emperor, Apatura_iris\npeacock, peacock_butterfly, Inachis_io\ndanaid, danaid_butterfly\nmonarch, monarch_butterfly, milkweed_butterfly, Danaus_plexippus\npierid, pierid_butterfly\ncabbage_butterfly\nsmall_white, Pieris_rapae\nlarge_white, Pieris_brassicae\nsouthern_cabbage_butterfly, Pieris_protodice\nsulphur_butterfly, sulfur_butterfly\nlycaenid, lycaenid_butterfly\nblue\ncopper\nAmerican_copper, Lycaena_hypophlaeas\nhairstreak, hairstreak_butterfly\nStrymon_melinus\nmoth\nmoth_miller, miller\ntortricid, tortricid_moth\nleaf_roller, leaf-roller\ntea_tortrix, tortrix, Homona_coffearia\norange_tortrix, tortrix, Argyrotaenia_citrana\ncodling_moth, codlin_moth, Carpocapsa_pomonella\nlymantriid, tussock_moth\ntussock_caterpillar\ngypsy_moth, gipsy_moth, Lymantria_dispar\nbrowntail, brown-tail_moth, Euproctis_phaeorrhoea\ngold-tail_moth, Euproctis_chrysorrhoea\ngeometrid, geometrid_moth\nPaleacrita_vernata\nAlsophila_pometaria\ncankerworm\nspring_cankerworm\nfall_cankerworm\nmeasuring_worm, inchworm, looper\npyralid, pyralid_moth\nbee_moth, wax_moth, Galleria_mellonella\ncorn_borer, European_corn_borer_moth, corn_borer_moth, Pyrausta_nubilalis\nMediterranean_flour_moth, Anagasta_kuehniella\ntobacco_moth, cacao_moth, Ephestia_elutella\nalmond_moth, fig_moth, Cadra_cautella\nraisin_moth, Cadra_figulilella\ntineoid, tineoid_moth\ntineid, tineid_moth\nclothes_moth\ncasemaking_clothes_moth, Tinea_pellionella\nwebbing_clothes_moth, webbing_moth, Tineola_bisselliella\ncarpet_moth, tapestry_moth, Trichophaga_tapetzella\ngelechiid, gelechiid_moth\ngrain_moth\nangoumois_moth, angoumois_grain_moth, Sitotroga_cerealella\npotato_moth, potato_tuber_moth, splitworm, Phthorimaea_operculella\npotato_tuberworm, Phthorimaea_operculella\nnoctuid_moth, noctuid, owlet_moth\ncutworm\nunderwing\nred_underwing, Catocala_nupta\nantler_moth, Cerapteryx_graminis\nheliothis_moth, Heliothis_zia\narmy_cutworm, Chorizagrotis_auxiliaris\narmyworm, Pseudaletia_unipuncta\narmyworm, army_worm, Pseudaletia_unipuncta\nSpodoptera_exigua\nbeet_armyworm, Spodoptera_exigua\nSpodoptera_frugiperda\nfall_armyworm, Spodoptera_frugiperda\nhawkmoth, hawk_moth, sphingid, sphinx_moth, hummingbird_moth\nManduca_sexta\ntobacco_hornworm, tomato_worm, Manduca_sexta\nManduca_quinquemaculata\ntomato_hornworm, potato_worm, Manduca_quinquemaculata\ndeath's-head_moth, Acherontia_atropos\nbombycid, bombycid_moth, silkworm_moth\ndomestic_silkworm_moth, domesticated_silkworm_moth, Bombyx_mori\nsilkworm\nsaturniid, saturniid_moth\nemperor, emperor_moth, Saturnia_pavonia\nimperial_moth, Eacles_imperialis\ngiant_silkworm_moth, silkworm_moth\nsilkworm, giant_silkworm, wild_wilkworm\nluna_moth, Actias_luna\ncecropia, cecropia_moth, Hyalophora_cecropia\ncynthia_moth, Samia_cynthia, Samia_walkeri\nailanthus_silkworm, Samia_cynthia\nio_moth, Automeris_io\npolyphemus_moth, Antheraea_polyphemus\npernyi_moth, Antheraea_pernyi\ntussah, tusseh, tussur, tussore, tusser, Antheraea_mylitta\natlas_moth, Atticus_atlas\narctiid, arctiid_moth\ntiger_moth\ncinnabar, cinnabar_moth, Callimorpha_jacobeae\nlasiocampid, lasiocampid_moth\neggar, egger\ntent-caterpillar_moth, Malacosoma_americana\ntent_caterpillar\ntent-caterpillar_moth, Malacosoma_disstria\nforest_tent_caterpillar, Malacosoma_disstria\nlappet, lappet_moth\nlappet_caterpillar\nwebworm\nwebworm_moth\nHyphantria_cunea\nfall_webworm, Hyphantria_cunea\ngarden_webworm, Loxostege_similalis\ninstar\ncaterpillar\ncorn_borer, Pyrausta_nubilalis\nbollworm\npink_bollworm, Gelechia_gossypiella\ncorn_earworm, cotton_bollworm, tomato_fruitworm, tobacco_budworm, vetchworm, Heliothis_zia\ncabbageworm, Pieris_rapae\nwoolly_bear, woolly_bear_caterpillar\nwoolly_bear_moth\nlarva\nnymph\nleptocephalus\ngrub\nmaggot\nleatherjacket\npupa\nchrysalis\nimago\nqueen\nphoronid\nbryozoan, polyzoan, sea_mat, sea_moss, moss_animal\nbrachiopod, lamp_shell, lampshell\npeanut_worm, sipunculid\nechinoderm\nstarfish, sea_star\nbrittle_star, brittle-star, serpent_star\nbasket_star, basket_fish\nAstrophyton_muricatum\nsea_urchin\nedible_sea_urchin, Echinus_esculentus\nsand_dollar\nheart_urchin\ncrinoid\nsea_lily\nfeather_star, comatulid\nsea_cucumber, holothurian\ntrepang, Holothuria_edulis\nDuplicidentata\nlagomorph, gnawing_mammal\nleporid, leporid_mammal\nrabbit, coney, cony\nrabbit_ears\nlapin\nbunny, bunny_rabbit\nEuropean_rabbit, Old_World_rabbit, Oryctolagus_cuniculus\nwood_rabbit, cottontail, cottontail_rabbit\neastern_cottontail, Sylvilagus_floridanus\nswamp_rabbit, canecutter, swamp_hare, Sylvilagus_aquaticus\nmarsh_hare, swamp_rabbit, Sylvilagus_palustris\nhare\nleveret\nEuropean_hare, Lepus_europaeus\njackrabbit\nwhite-tailed_jackrabbit, whitetail_jackrabbit, Lepus_townsendi\nblacktail_jackrabbit, Lepus_californicus\npolar_hare, Arctic_hare, Lepus_arcticus\nsnowshoe_hare, snowshoe_rabbit, varying_hare, Lepus_americanus\nBelgian_hare, leporide\nAngora, Angora_rabbit\npika, mouse_hare, rock_rabbit, coney, cony\nlittle_chief_hare, Ochotona_princeps\ncollared_pika, Ochotona_collaris\nrodent, gnawer\nmouse\nrat\npocket_rat\nmurine\nhouse_mouse, Mus_musculus\nharvest_mouse, Micromyx_minutus\nfield_mouse, fieldmouse\nnude_mouse\nEuropean_wood_mouse, Apodemus_sylvaticus\nbrown_rat, Norway_rat, Rattus_norvegicus\nwharf_rat\nsewer_rat\nblack_rat, roof_rat, Rattus_rattus\nbandicoot_rat, mole_rat\njerboa_rat\nkangaroo_mouse\nwater_rat\nbeaver_rat\nNew_World_mouse\nAmerican_harvest_mouse, harvest_mouse\nwood_mouse\nwhite-footed_mouse, vesper_mouse, Peromyscus_leucopus\ndeer_mouse, Peromyscus_maniculatus\ncactus_mouse, Peromyscus_eremicus\ncotton_mouse, Peromyscus_gossypinus\npygmy_mouse, Baiomys_taylori\ngrasshopper_mouse\nmuskrat, musquash, Ondatra_zibethica\nround-tailed_muskrat, Florida_water_rat, Neofiber_alleni\ncotton_rat, Sigmodon_hispidus\nwood_rat, wood-rat\ndusky-footed_wood_rat\nvole, field_mouse\npackrat, pack_rat, trade_rat, bushytail_woodrat, Neotoma_cinerea\ndusky-footed_woodrat, Neotoma_fuscipes\neastern_woodrat, Neotoma_floridana\nrice_rat, Oryzomys_palustris\npine_vole, pine_mouse, Pitymys_pinetorum\nmeadow_vole, meadow_mouse, Microtus_pennsylvaticus\nwater_vole, Richardson_vole, Microtus_richardsoni\nprairie_vole, Microtus_ochrogaster\nwater_vole, water_rat, Arvicola_amphibius\nred-backed_mouse, redback_vole\nphenacomys\nhamster\nEurasian_hamster, Cricetus_cricetus\ngolden_hamster, Syrian_hamster, Mesocricetus_auratus\ngerbil, gerbille\njird\ntamarisk_gerbil, Meriones_unguiculatus\nsand_rat, Meriones_longifrons\nlemming\nEuropean_lemming, Lemmus_lemmus\nbrown_lemming, Lemmus_trimucronatus\ngrey_lemming, gray_lemming, red-backed_lemming\npied_lemming\nHudson_bay_collared_lemming, Dicrostonyx_hudsonius\nsouthern_bog_lemming, Synaptomys_cooperi\nnorthern_bog_lemming, Synaptomys_borealis\nporcupine, hedgehog\nOld_World_porcupine\nbrush-tailed_porcupine, brush-tail_porcupine\nlong-tailed_porcupine, Trichys_lipura\nNew_World_porcupine\nCanada_porcupine, Erethizon_dorsatum\npocket_mouse\nsilky_pocket_mouse, Perognathus_flavus\nplains_pocket_mouse, Perognathus_flavescens\nhispid_pocket_mouse, Perognathus_hispidus\nMexican_pocket_mouse, Liomys_irroratus\nkangaroo_rat, desert_rat, Dipodomys_phillipsii\nOrd_kangaroo_rat, Dipodomys_ordi\nkangaroo_mouse, dwarf_pocket_rat\njumping_mouse\nmeadow_jumping_mouse, Zapus_hudsonius\njerboa\ntypical_jerboa\nJaculus_jaculus\ndormouse\nloir, Glis_glis\nhazel_mouse, Muscardinus_avellanarius\nlerot\ngopher, pocket_gopher, pouched_rat\nplains_pocket_gopher, Geomys_bursarius\nsoutheastern_pocket_gopher, Geomys_pinetis\nvalley_pocket_gopher, Thomomys_bottae\nnorthern_pocket_gopher, Thomomys_talpoides\nsquirrel\ntree_squirrel\neastern_grey_squirrel, eastern_gray_squirrel, cat_squirrel, Sciurus_carolinensis\nwestern_grey_squirrel, western_gray_squirrel, Sciurus_griseus\nfox_squirrel, eastern_fox_squirrel, Sciurus_niger\nblack_squirrel\nred_squirrel, cat_squirrel, Sciurus_vulgaris\nAmerican_red_squirrel, spruce_squirrel, red_squirrel, Sciurus_hudsonicus, Tamiasciurus_hudsonicus\nchickeree, Douglas_squirrel, Tamiasciurus_douglasi\nantelope_squirrel, whitetail_antelope_squirrel, antelope_chipmunk, Citellus_leucurus\nground_squirrel, gopher, spermophile\nmantled_ground_squirrel, Citellus_lateralis\nsuslik, souslik, Citellus_citellus\nflickertail, Richardson_ground_squirrel, Citellus_richardsoni\nrock_squirrel, Citellus_variegatus\nArctic_ground_squirrel, parka_squirrel, Citellus_parryi\nprairie_dog, prairie_marmot\nblacktail_prairie_dog, Cynomys_ludovicianus\nwhitetail_prairie_dog, Cynomys_gunnisoni\neastern_chipmunk, hackee, striped_squirrel, ground_squirrel, Tamias_striatus\nchipmunk\nbaronduki, baranduki, barunduki, burunduki, Eutamius_asiaticus, Eutamius_sibiricus\nAmerican_flying_squirrel\nsouthern_flying_squirrel, Glaucomys_volans\nnorthern_flying_squirrel, Glaucomys_sabrinus\nmarmot\ngroundhog, woodchuck, Marmota_monax\nhoary_marmot, whistler, whistling_marmot, Marmota_caligata\nyellowbelly_marmot, rockchuck, Marmota_flaviventris\nAsiatic_flying_squirrel\nbeaver\nOld_World_beaver, Castor_fiber\nNew_World_beaver, Castor_canadensis\nmountain_beaver, sewellel, Aplodontia_rufa\ncavy\nguinea_pig, Cavia_cobaya\naperea, wild_cavy, Cavia_porcellus\nmara, Dolichotis_patagonum\ncapybara, capibara, Hydrochoerus_hydrochaeris\nagouti, Dasyprocta_aguti\npaca, Cuniculus_paca\nmountain_paca\ncoypu, nutria, Myocastor_coypus\nchinchilla, Chinchilla_laniger\nmountain_chinchilla, mountain_viscacha\nviscacha, chinchillon, Lagostomus_maximus\nabrocome, chinchilla_rat, rat_chinchilla\nmole_rat\nmole_rat\nsand_rat\nnaked_mole_rat\nqueen, queen_mole_rat\nDamaraland_mole_rat\nUngulata\nungulate, hoofed_mammal\nunguiculate, unguiculate_mammal\ndinoceras, uintathere\nhyrax, coney, cony, dassie, das\nrock_hyrax, rock_rabbit, Procavia_capensis\nodd-toed_ungulate, perissodactyl, perissodactyl_mammal\nequine, equid\nhorse, Equus_caballus\nroan\nstablemate, stable_companion\ngee-gee\neohippus, dawn_horse\nfoal\nfilly\ncolt\nmale_horse\nridgeling, ridgling, ridgel, ridgil\nstallion, entire\nstud, studhorse\ngelding\nmare, female_horse\nbroodmare, stud_mare\nsaddle_horse, riding_horse, mount\nremount\npalfrey\nwarhorse\ncavalry_horse\ncharger, courser\nsteed\nprancer\nhack\ncow_pony\nquarter_horse\nMorgan\nTennessee_walker, Tennessee_walking_horse, Walking_horse, Plantation_walking_horse\nAmerican_saddle_horse\nAppaloosa\nArabian, Arab\nLippizan, Lipizzan, Lippizaner\npony\npolo_pony\nmustang\nbronco, bronc, broncho\nbucking_bronco\nbuckskin\ncrowbait, crow-bait\ndun\ngrey, gray\nwild_horse\ntarpan, Equus_caballus_gomelini\nPrzewalski's_horse, Przevalski's_horse, Equus_caballus_przewalskii, Equus_caballus_przevalskii\ncayuse, Indian_pony\nhack\nhack, jade, nag, plug\nplow_horse, plough_horse\npony\nShetland_pony\nWelsh_pony\nExmoor\nracehorse, race_horse, bangtail\nthoroughbred\nsteeplechaser\nracer\nfinisher\npony\nyearling\ndark_horse\nmudder\nnonstarter\nstalking-horse\nharness_horse\ncob\nhackney\nworkhorse\ndraft_horse, draught_horse, dray_horse\npackhorse\ncarthorse, cart_horse, drayhorse\nClydesdale\nPercheron\nfarm_horse, dobbin\nshire, shire_horse\npole_horse, poler\npost_horse, post-horse, poster\ncoach_horse\npacer\npacer, pacemaker, pacesetter\ntrotting_horse, trotter\npole_horse\nstepper, high_stepper\nchestnut\nliver_chestnut\nbay\nsorrel\npalomino\npinto\nass\ndomestic_ass, donkey, Equus_asinus\nburro\nmoke\njack, jackass\njennet, jenny, jenny_ass\nmule\nhinny\nwild_ass\nAfrican_wild_ass, Equus_asinus\nkiang, Equus_kiang\nonager, Equus_hemionus\nchigetai, dziggetai, Equus_hemionus_hemionus\nzebra\ncommon_zebra, Burchell's_zebra, Equus_Burchelli\nmountain_zebra, Equus_zebra_zebra\ngrevy's_zebra, Equus_grevyi\nquagga, Equus_quagga\nrhinoceros, rhino\nIndian_rhinoceros, Rhinoceros_unicornis\nwoolly_rhinoceros, Rhinoceros_antiquitatis\nwhite_rhinoceros, Ceratotherium_simum, Diceros_simus\nblack_rhinoceros, Diceros_bicornis\ntapir\nNew_World_tapir, Tapirus_terrestris\nMalayan_tapir, Indian_tapir, Tapirus_indicus\neven-toed_ungulate, artiodactyl, artiodactyl_mammal\nswine\nhog, pig, grunter, squealer, Sus_scrofa\npiglet, piggy, shoat, shote\nsucking_pig\nporker\nboar\nsow\nrazorback, razorback_hog, razorbacked_hog\nwild_boar, boar, Sus_scrofa\nbabirusa, babiroussa, babirussa, Babyrousa_Babyrussa\nwarthog\npeccary, musk_hog\ncollared_peccary, javelina, Tayassu_angulatus, Tayassu_tajacu, Peccari_angulatus\nwhite-lipped_peccary, Tayassu_pecari\nhippopotamus, hippo, river_horse, Hippopotamus_amphibius\nruminant\nbovid\nbovine\nox, wild_ox\ncattle, cows, kine, oxen, Bos_taurus\nox\nstirk\nbullock, steer\nbull\ncow, moo-cow\nheifer\nbullock\ndogie, dogy, leppy\nmaverick\nbeef, beef_cattle\nlonghorn, Texas_longhorn\nBrahman, Brahma, Brahmin, Bos_indicus\nzebu\naurochs, urus, Bos_primigenius\nyak, Bos_grunniens\nbanteng, banting, tsine, Bos_banteng\nWelsh, Welsh_Black\nred_poll\nSanta_Gertrudis\nAberdeen_Angus, Angus, black_Angus\nAfricander\ndairy_cattle, dairy_cow, milch_cow, milk_cow, milcher, milker\nAyrshire\nBrown_Swiss\nCharolais\nJersey\nDevon\ngrade\nDurham, shorthorn\nmilking_shorthorn\nGalloway\nFriesian, Holstein, Holstein-Friesian\nGuernsey\nHereford, whiteface\ncattalo, beefalo\nOld_World_buffalo, buffalo\nwater_buffalo, water_ox, Asiatic_buffalo, Bubalus_bubalis\nIndian_buffalo\ncarabao\nanoa, dwarf_buffalo, Anoa_depressicornis\ntamarau, tamarao, Bubalus_mindorensis, Anoa_mindorensis\nCape_buffalo, Synercus_caffer\nAsian_wild_ox\ngaur, Bibos_gaurus\ngayal, mithan, Bibos_frontalis\nbison\nAmerican_bison, American_buffalo, buffalo, Bison_bison\nwisent, aurochs, Bison_bonasus\nmusk_ox, musk_sheep, Ovibos_moschatus\nsheep\newe\nram, tup\nwether\nlamb\nlambkin\nbaa-lamb\nhog, hogget, hogg\nteg\nPersian_lamb\nblack_sheep\ndomestic_sheep, Ovis_aries\nCotswold\nHampshire, Hampshire_down\nLincoln\nExmoor\nCheviot\nbroadtail, caracul, karakul\nlongwool\nmerino, merino_sheep\nRambouillet\nwild_sheep\nargali, argal, Ovis_ammon\nMarco_Polo_sheep, Marco_Polo's_sheep, Ovis_poli\nurial, Ovis_vignei\nDall_sheep, Dall's_sheep, white_sheep, Ovis_montana_dalli\nmountain_sheep\nbighorn, bighorn_sheep, cimarron, Rocky_Mountain_bighorn, Rocky_Mountain_sheep, Ovis_canadensis\nmouflon, moufflon, Ovis_musimon\naoudad, arui, audad, Barbary_sheep, maned_sheep, Ammotragus_lervia\ngoat, caprine_animal\nkid\nbilly, billy_goat, he-goat\nnanny, nanny-goat, she-goat\ndomestic_goat, Capra_hircus\nCashmere_goat, Kashmir_goat\nAngora, Angora_goat\nwild_goat\nbezoar_goat, pasang, Capra_aegagrus\nmarkhor, markhoor, Capra_falconeri\nibex, Capra_ibex\ngoat_antelope\nmountain_goat, Rocky_Mountain_goat, Oreamnos_americanus\ngoral, Naemorhedus_goral\nserow\nchamois, Rupicapra_rupicapra\ntakin, gnu_goat, Budorcas_taxicolor\nantelope\nblackbuck, black_buck, Antilope_cervicapra\ngerenuk, Litocranius_walleri\naddax, Addax_nasomaculatus\ngnu, wildebeest\ndik-dik\nhartebeest\nsassaby, topi, Damaliscus_lunatus\nimpala, Aepyceros_melampus\ngazelle\nThomson's_gazelle, Gazella_thomsoni\nGazella_subgutturosa\nspringbok, springbuck, Antidorcas_marsupialis, Antidorcas_euchore\nbongo, Tragelaphus_eurycerus, Boocercus_eurycerus\nkudu, koodoo, koudou\ngreater_kudu, Tragelaphus_strepsiceros\nlesser_kudu, Tragelaphus_imberbis\nharnessed_antelope\nnyala, Tragelaphus_angasi\nmountain_nyala, Tragelaphus_buxtoni\nbushbuck, guib, Tragelaphus_scriptus\nnilgai, nylghai, nylghau, blue_bull, Boselaphus_tragocamelus\nsable_antelope, Hippotragus_niger\nsaiga, Saiga_tatarica\nsteenbok, steinbok, Raphicerus_campestris\neland\ncommon_eland, Taurotragus_oryx\ngiant_eland, Taurotragus_derbianus\nkob, Kobus_kob\nlechwe, Kobus_leche\nwaterbuck\npuku, Adenota_vardoni\noryx, pasang\ngemsbok, gemsbuck, Oryx_gazella\nforest_goat, spindle_horn, Pseudoryx_nghetinhensis\npronghorn, prongbuck, pronghorn_antelope, American_antelope, Antilocapra_americana\ndeer, cervid\nstag\nroyal, royal_stag\npricket\nfawn\nred_deer, elk, American_elk, wapiti, Cervus_elaphus\nhart, stag\nhind\nbrocket\nsambar, sambur, Cervus_unicolor\nwapiti, elk, American_elk, Cervus_elaphus_canadensis\nJapanese_deer, sika, Cervus_nipon, Cervus_sika\nVirginia_deer, white_tail, whitetail, white-tailed_deer, whitetail_deer, Odocoileus_Virginianus\nmule_deer, burro_deer, Odocoileus_hemionus\nblack-tailed_deer, blacktail_deer, blacktail, Odocoileus_hemionus_columbianus\nelk, European_elk, moose, Alces_alces\nfallow_deer, Dama_dama\nroe_deer, Capreolus_capreolus\nroebuck\ncaribou, reindeer, Greenland_caribou, Rangifer_tarandus\nwoodland_caribou, Rangifer_caribou\nbarren_ground_caribou, Rangifer_arcticus\nbrocket\nmuntjac, barking_deer\nmusk_deer, Moschus_moschiferus\npere_david's_deer, elaphure, Elaphurus_davidianus\nchevrotain, mouse_deer\nkanchil, Tragulus_kanchil\nnapu, Tragulus_Javanicus\nwater_chevrotain, water_deer, Hyemoschus_aquaticus\ncamel\nArabian_camel, dromedary, Camelus_dromedarius\nBactrian_camel, Camelus_bactrianus\nllama\ndomestic_llama, Lama_peruana\nguanaco, Lama_guanicoe\nalpaca, Lama_pacos\nvicuna, Vicugna_vicugna\ngiraffe, camelopard, Giraffa_camelopardalis\nokapi, Okapia_johnstoni\nmusteline_mammal, mustelid, musteline\nweasel\nermine, shorttail_weasel, Mustela_erminea\nstoat\nNew_World_least_weasel, Mustela_rixosa\nOld_World_least_weasel, Mustela_nivalis\nlongtail_weasel, long-tailed_weasel, Mustela_frenata\nmink\nAmerican_mink, Mustela_vison\npolecat, fitch, foulmart, foumart, Mustela_putorius\nferret\nblack-footed_ferret, ferret, Mustela_nigripes\nmuishond\nsnake_muishond, Poecilogale_albinucha\nstriped_muishond, Ictonyx_striata\notter\nriver_otter, Lutra_canadensis\nEurasian_otter, Lutra_lutra\nsea_otter, Enhydra_lutris\nskunk, polecat, wood_pussy\nstriped_skunk, Mephitis_mephitis\nhooded_skunk, Mephitis_macroura\nhog-nosed_skunk, hognosed_skunk, badger_skunk, rooter_skunk, Conepatus_leuconotus\nspotted_skunk, little_spotted_skunk, Spilogale_putorius\nbadger\nAmerican_badger, Taxidea_taxus\nEurasian_badger, Meles_meles\nratel, honey_badger, Mellivora_capensis\nferret_badger\nhog_badger, hog-nosed_badger, sand_badger, Arctonyx_collaris\nwolverine, carcajou, skunk_bear, Gulo_luscus\nglutton, Gulo_gulo, wolverine\ngrison, Grison_vittatus, Galictis_vittatus\nmarten, marten_cat\npine_marten, Martes_martes\nsable, Martes_zibellina\nAmerican_marten, American_sable, Martes_americana\nstone_marten, beech_marten, Martes_foina\nfisher, pekan, fisher_cat, black_cat, Martes_pennanti\nyellow-throated_marten, Charronia_flavigula\ntayra, taira, Eira_barbara\nfictional_animal\npachyderm\nedentate\narmadillo\npeba, nine-banded_armadillo, Texas_armadillo, Dasypus_novemcinctus\napar, three-banded_armadillo, Tolypeutes_tricinctus\ntatouay, cabassous, Cabassous_unicinctus\npeludo, poyou, Euphractus_sexcinctus\ngiant_armadillo, tatou, tatu, Priodontes_giganteus\npichiciago, pichiciego, fairy_armadillo, chlamyphore, Chlamyphorus_truncatus\nsloth, tree_sloth\nthree-toed_sloth, ai, Bradypus_tridactylus\ntwo-toed_sloth, unau, unai, Choloepus_didactylus\ntwo-toed_sloth, unau, unai, Choloepus_hoffmanni\nmegatherian, megatheriid, megatherian_mammal\nmylodontid\nanteater, New_World_anteater\nant_bear, giant_anteater, great_anteater, tamanoir, Myrmecophaga_jubata\nsilky_anteater, two-toed_anteater, Cyclopes_didactylus\ntamandua, tamandu, lesser_anteater, Tamandua_tetradactyla\npangolin, scaly_anteater, anteater\ncoronet\nscapular\ntadpole, polliwog, pollywog\nprimate\nsimian\nape\nanthropoid\nanthropoid_ape\nhominoid\nhominid\nhomo, man, human_being, human\nworld, human_race, humanity, humankind, human_beings, humans, mankind, man\nHomo_erectus\nPithecanthropus, Pithecanthropus_erectus, genus_Pithecanthropus\nJava_man, Trinil_man\nPeking_man\nSinanthropus, genus_Sinanthropus\nHomo_soloensis\nJavanthropus, genus_Javanthropus\nHomo_habilis\nHomo_sapiens\nNeandertal_man, Neanderthal_man, Neandertal, Neanderthal, Homo_sapiens_neanderthalensis\nCro-magnon\nHomo_sapiens_sapiens, modern_man\naustralopithecine\nAustralopithecus_afarensis\nAustralopithecus_africanus\nAustralopithecus_boisei\nZinjanthropus, genus_Zinjanthropus\nAustralopithecus_robustus\nParanthropus, genus_Paranthropus\nSivapithecus\nrudapithecus, Dryopithecus_Rudapithecus_hungaricus\nproconsul\nAegyptopithecus\ngreat_ape, pongid\norangutan, orang, orangutang, Pongo_pygmaeus\ngorilla, Gorilla_gorilla\nwestern_lowland_gorilla, Gorilla_gorilla_gorilla\neastern_lowland_gorilla, Gorilla_gorilla_grauri\nmountain_gorilla, Gorilla_gorilla_beringei\nsilverback\nchimpanzee, chimp, Pan_troglodytes\nwestern_chimpanzee, Pan_troglodytes_verus\neastern_chimpanzee, Pan_troglodytes_schweinfurthii\ncentral_chimpanzee, Pan_troglodytes_troglodytes\npygmy_chimpanzee, bonobo, Pan_paniscus\nlesser_ape\ngibbon, Hylobates_lar\nsiamang, Hylobates_syndactylus, Symphalangus_syndactylus\nmonkey\nOld_World_monkey, catarrhine\nguenon, guenon_monkey\ntalapoin, Cercopithecus_talapoin\ngrivet, Cercopithecus_aethiops\nvervet, vervet_monkey, Cercopithecus_aethiops_pygerythrus\ngreen_monkey, African_green_monkey, Cercopithecus_aethiops_sabaeus\nmangabey\npatas, hussar_monkey, Erythrocebus_patas\nbaboon\nchacma, chacma_baboon, Papio_ursinus\nmandrill, Mandrillus_sphinx\ndrill, Mandrillus_leucophaeus\nmacaque\nrhesus, rhesus_monkey, Macaca_mulatta\nbonnet_macaque, bonnet_monkey, capped_macaque, crown_monkey, Macaca_radiata\nBarbary_ape, Macaca_sylvana\ncrab-eating_macaque, croo_monkey, Macaca_irus\nlangur\nentellus, hanuman, Presbytes_entellus, Semnopithecus_entellus\ncolobus, colobus_monkey\nguereza, Colobus_guereza\nproboscis_monkey, Nasalis_larvatus\nNew_World_monkey, platyrrhine, platyrrhinian\nmarmoset\ntrue_marmoset\npygmy_marmoset, Cebuella_pygmaea\ntamarin, lion_monkey, lion_marmoset, leoncita\nsilky_tamarin, Leontocebus_rosalia\npinche, Leontocebus_oedipus\ncapuchin, ringtail, Cebus_capucinus\ndouroucouli, Aotus_trivirgatus\nhowler_monkey, howler\nsaki\nuakari\ntiti, titi_monkey\nspider_monkey, Ateles_geoffroyi\nsquirrel_monkey, Saimiri_sciureus\nwoolly_monkey\ntree_shrew\nprosimian\nlemur\nMadagascar_cat, ring-tailed_lemur, Lemur_catta\naye-aye, Daubentonia_madagascariensis\nslender_loris, Loris_gracilis\nslow_loris, Nycticebus_tardigradua, Nycticebus_pygmaeus\npotto, kinkajou, Perodicticus_potto\nangwantibo, golden_potto, Arctocebus_calabarensis\ngalago, bushbaby, bush_baby\nindri, indris, Indri_indri, Indri_brevicaudatus\nwoolly_indris, Avahi_laniger\ntarsier\nTarsius_syrichta\nTarsius_glis\nflying_lemur, flying_cat, colugo\nCynocephalus_variegatus\nproboscidean, proboscidian\nelephant\nrogue_elephant\nIndian_elephant, Elephas_maximus\nAfrican_elephant, Loxodonta_africana\nmammoth\nwoolly_mammoth, northern_mammoth, Mammuthus_primigenius\ncolumbian_mammoth, Mammuthus_columbi\nimperial_mammoth, imperial_elephant, Archidiskidon_imperator\nmastodon, mastodont\nplantigrade_mammal, plantigrade\ndigitigrade_mammal, digitigrade\nprocyonid\nraccoon, racoon\ncommon_raccoon, common_racoon, coon, ringtail, Procyon_lotor\ncrab-eating_raccoon, Procyon_cancrivorus\nbassarisk, cacomistle, cacomixle, coon_cat, raccoon_fox, ringtail, ring-tailed_cat, civet_cat, miner's_cat, Bassariscus_astutus\nkinkajou, honey_bear, potto, Potos_flavus, Potos_caudivolvulus\ncoati, coati-mondi, coati-mundi, coon_cat, Nasua_narica\nlesser_panda, red_panda, panda, bear_cat, cat_bear, Ailurus_fulgens\ngiant_panda, panda, panda_bear, coon_bear, Ailuropoda_melanoleuca\ntwitterer\nfish\nfingerling\ngame_fish, sport_fish\nfood_fish\nrough_fish\ngroundfish, bottom_fish\nyoung_fish\nparr\nmouthbreeder\nspawner\nbarracouta, snoek\ncrossopterygian, lobefin, lobe-finned_fish\ncoelacanth, Latimeria_chalumnae\nlungfish\nceratodus\ncatfish, siluriform_fish\nsilurid, silurid_fish\nEuropean_catfish, sheatfish, Silurus_glanis\nelectric_catfish, Malopterurus_electricus\nbullhead, bullhead_catfish\nhorned_pout, hornpout, pout, Ameiurus_Melas\nbrown_bullhead\nchannel_catfish, channel_cat, Ictalurus_punctatus\nblue_catfish, blue_cat, blue_channel_catfish, blue_channel_cat\nflathead_catfish, mudcat, goujon, shovelnose_catfish, spoonbill_catfish, Pylodictus_olivaris\narmored_catfish\nsea_catfish\ngadoid, gadoid_fish\ncod, codfish\ncodling\nAtlantic_cod, Gadus_morhua\nPacific_cod, Alaska_cod, Gadus_macrocephalus\nwhiting, Merlangus_merlangus, Gadus_merlangus\nburbot, eelpout, ling, cusk, Lota_lota\nhaddock, Melanogrammus_aeglefinus\npollack, pollock, Pollachius_pollachius\nhake\nsilver_hake, Merluccius_bilinearis, whiting\nling\ncusk, torsk, Brosme_brosme\ngrenadier, rattail, rattail_fish\neel\nelver\ncommon_eel, freshwater_eel\ntuna, Anguilla_sucklandii\nmoray, moray_eel\nconger, conger_eel\nteleost_fish, teleost, teleostan\nbeaked_salmon, sandfish, Gonorhynchus_gonorhynchus\nclupeid_fish, clupeid\nwhitebait\nbrit, britt\nshad\ncommon_American_shad, Alosa_sapidissima\nriver_shad, Alosa_chrysocloris\nallice_shad, allis_shad, allice, allis, Alosa_alosa\nalewife, Alosa_pseudoharengus, Pomolobus_pseudoharengus\nmenhaden, Brevoortia_tyrannis\nherring, Clupea_harangus\nAtlantic_herring, Clupea_harengus_harengus\nPacific_herring, Clupea_harengus_pallasii\nsardine\nsild\nbrisling, sprat, Clupea_sprattus\npilchard, sardine, Sardina_pilchardus\nPacific_sardine, Sardinops_caerulea\nanchovy\nmediterranean_anchovy, Engraulis_encrasicholus\nsalmonid\nsalmon\nparr\nblackfish\nredfish\nAtlantic_salmon, Salmo_salar\nlandlocked_salmon, lake_salmon\nsockeye, sockeye_salmon, red_salmon, blueback_salmon, Oncorhynchus_nerka\nchinook, chinook_salmon, king_salmon, quinnat_salmon, Oncorhynchus_tshawytscha\ncoho, cohoe, coho_salmon, blue_jack, silver_salmon, Oncorhynchus_kisutch\ntrout\nbrown_trout, salmon_trout, Salmo_trutta\nrainbow_trout, Salmo_gairdneri\nsea_trout\nlake_trout, salmon_trout, Salvelinus_namaycush\nbrook_trout, speckled_trout, Salvelinus_fontinalis\nchar, charr\nArctic_char, Salvelinus_alpinus\nwhitefish\nlake_whitefish, Coregonus_clupeaformis\ncisco, lake_herring, Coregonus_artedi\nround_whitefish, Menominee_whitefish, Prosopium_cylindraceum\nsmelt\nsparling, European_smelt, Osmerus_eperlanus\ncapelin, capelan, caplin\ntarpon, Tarpon_atlanticus\nladyfish, tenpounder, Elops_saurus\nbonefish, Albula_vulpes\nargentine\nlanternfish\nlizardfish, snakefish, snake-fish\nlancetfish, lancet_fish, wolffish\nopah, moonfish, Lampris_regius\nNew_World_opah, Lampris_guttatus\nribbonfish\ndealfish, Trachipterus_arcticus\noarfish, king_of_the_herring, ribbonfish, Regalecus_glesne\nbatfish\ngoosefish, angler, anglerfish, angler_fish, monkfish, lotte, allmouth, Lophius_Americanus\ntoadfish, Opsanus_tau\noyster_fish, oyster-fish, oysterfish\nfrogfish\nsargassum_fish\nneedlefish, gar, billfish\ntimucu\nflying_fish\nmonoplane_flying_fish, two-wing_flying_fish\nhalfbeak\nsaury, billfish, Scomberesox_saurus\nspiny-finned_fish, acanthopterygian\nlingcod, Ophiodon_elongatus\npercoid_fish, percoid, percoidean\nperch\nclimbing_perch, Anabas_testudineus, A._testudineus\nperch\nyellow_perch, Perca_flavescens\nEuropean_perch, Perca_fluviatilis\npike-perch, pike_perch\nwalleye, walleyed_pike, jack_salmon, dory, Stizostedion_vitreum\nblue_pike, blue_pickerel, blue_pikeperch, blue_walleye, Strizostedion_vitreum_glaucum\nsnail_darter, Percina_tanasi\ncusk-eel\nbrotula\npearlfish, pearl-fish\nrobalo\nsnook\npike\nnorthern_pike, Esox_lucius\nmuskellunge, Esox_masquinongy\npickerel\nchain_pickerel, chain_pike, Esox_niger\nredfin_pickerel, barred_pickerel, Esox_americanus\nsunfish, centrarchid\ncrappie\nblack_crappie, Pomoxis_nigromaculatus\nwhite_crappie, Pomoxis_annularis\nfreshwater_bream, bream\npumpkinseed, Lepomis_gibbosus\nbluegill, Lepomis_macrochirus\nspotted_sunfish, stumpknocker, Lepomis_punctatus\nfreshwater_bass\nrock_bass, rock_sunfish, Ambloplites_rupestris\nblack_bass\nKentucky_black_bass, spotted_black_bass, Micropterus_pseudoplites\nsmallmouth, smallmouth_bass, smallmouthed_bass, smallmouth_black_bass, smallmouthed_black_bass, Micropterus_dolomieu\nlargemouth, largemouth_bass, largemouthed_bass, largemouth_black_bass, largemouthed_black_bass, Micropterus_salmoides\nbass\nserranid_fish, serranid\nwhite_perch, silver_perch, Morone_americana\nyellow_bass, Morone_interrupta\nblackmouth_bass, Synagrops_bellus\nrock_sea_bass, rock_bass, Centropristis_philadelphica\nstriped_bass, striper, Roccus_saxatilis, rockfish\nstone_bass, wreckfish, Polyprion_americanus\ngrouper\nhind\nrock_hind, Epinephelus_adscensionis\ncreole-fish, Paranthias_furcifer\njewfish, Mycteroperca_bonaci\nsoapfish\nsurfperch, surffish, surf_fish\nrainbow_seaperch, rainbow_perch, Hipsurus_caryi\nbigeye\ncatalufa, Priacanthus_arenatus\ncardinalfish\nflame_fish, flamefish, Apogon_maculatus\ntilefish, Lopholatilus_chamaeleonticeps\nbluefish, Pomatomus_saltatrix\ncobia, Rachycentron_canadum, sergeant_fish\nremora, suckerfish, sucking_fish\nsharksucker, Echeneis_naucrates\nwhale_sucker, whalesucker, Remilegia_australis\ncarangid_fish, carangid\njack\ncrevalle_jack, jack_crevalle, Caranx_hippos\nyellow_jack, Caranx_bartholomaei\nrunner, blue_runner, Caranx_crysos\nrainbow_runner, Elagatis_bipinnulata\nleatherjacket, leatherjack\nthreadfish, thread-fish, Alectis_ciliaris\nmoonfish, Atlantic_moonfish, horsefish, horsehead, horse-head, dollarfish, Selene_setapinnis\nlookdown, lookdown_fish, Selene_vomer\namberjack, amberfish\nyellowtail, Seriola_dorsalis\nkingfish, Seriola_grandis\npompano\nFlorida_pompano, Trachinotus_carolinus\npermit, Trachinotus_falcatus\nscad\nhorse_mackerel, jack_mackerel, Spanish_mackerel, saurel, Trachurus_symmetricus\nhorse_mackerel, saurel, Trachurus_trachurus\nbigeye_scad, big-eyed_scad, goggle-eye, Selar_crumenophthalmus\nmackerel_scad, mackerel_shad, Decapterus_macarellus\nround_scad, cigarfish, quiaquia, Decapterus_punctatus\ndolphinfish, dolphin, mahimahi\nCoryphaena_hippurus\nCoryphaena_equisetis\npomfret, Brama_raii\ncharacin, characin_fish, characid\ntetra\ncardinal_tetra, Paracheirodon_axelrodi\npiranha, pirana, caribe\ncichlid, cichlid_fish\nbolti, Tilapia_nilotica\nsnapper\nred_snapper, Lutjanus_blackfordi\ngrey_snapper, gray_snapper, mangrove_snapper, Lutjanus_griseus\nmutton_snapper, muttonfish, Lutjanus_analis\nschoolmaster, Lutjanus_apodus\nyellowtail, yellowtail_snapper, Ocyurus_chrysurus\ngrunt\nmargate, Haemulon_album\nSpanish_grunt, Haemulon_macrostomum\ntomtate, Haemulon_aurolineatum\ncottonwick, Haemulon_malanurum\nsailor's-choice, sailors_choice, Haemulon_parra\nporkfish, pork-fish, Anisotremus_virginicus\npompon, black_margate, Anisotremus_surinamensis\npigfish, hogfish, Orthopristis_chrysopterus\nsparid, sparid_fish\nsea_bream, bream\nporgy\nred_porgy, Pagrus_pagrus\nEuropean_sea_bream, Pagellus_centrodontus\nAtlantic_sea_bream, Archosargus_rhomboidalis\nsheepshead, Archosargus_probatocephalus\npinfish, sailor's-choice, squirrelfish, Lagodon_rhomboides\nsheepshead_porgy, Calamus_penna\nsnapper, Chrysophrys_auratus\nblack_bream, Chrysophrys_australis\nscup, northern_porgy, northern_scup, Stenotomus_chrysops\nscup, southern_porgy, southern_scup, Stenotomus_aculeatus\nsciaenid_fish, sciaenid\nstriped_drum, Equetus_pulcher\njackknife-fish, Equetus_lanceolatus\nsilver_perch, mademoiselle, Bairdiella_chrysoura\nred_drum, channel_bass, redfish, Sciaenops_ocellatus\nmulloway, jewfish, Sciaena_antarctica\nmaigre, maiger, Sciaena_aquila\ncroaker\nAtlantic_croaker, Micropogonias_undulatus\nyellowfin_croaker, surffish, surf_fish, Umbrina_roncador\nwhiting\nkingfish\nking_whiting, Menticirrhus_americanus\nnorthern_whiting, Menticirrhus_saxatilis\ncorbina, Menticirrhus_undulatus\nwhite_croaker, chenfish, kingfish, Genyonemus_lineatus\nwhite_croaker, queenfish, Seriphus_politus\nsea_trout\nweakfish, Cynoscion_regalis\nspotted_weakfish, spotted_sea_trout, spotted_squeateague, Cynoscion_nebulosus\nmullet\ngoatfish, red_mullet, surmullet, Mullus_surmuletus\nred_goatfish, Mullus_auratus\nyellow_goatfish, Mulloidichthys_martinicus\nmullet, grey_mullet, gray_mullet\nstriped_mullet, Mugil_cephalus\nwhite_mullet, Mugil_curema\nliza, Mugil_liza\nsilversides, silverside\njacksmelt, Atherinopsis_californiensis\nbarracuda\ngreat_barracuda, Sphyraena_barracuda\nsweeper\nsea_chub\nBermuda_chub, rudderfish, Kyphosus_sectatrix\nspadefish, angelfish, Chaetodipterus_faber\nbutterfly_fish\nchaetodon\nangelfish\nrock_beauty, Holocanthus_tricolor\ndamselfish, demoiselle\nbeaugregory, Pomacentrus_leucostictus\nanemone_fish\nclown_anemone_fish, Amphiprion_percula\nsergeant_major, Abudefduf_saxatilis\nwrasse\npigfish, giant_pigfish, Achoerodus_gouldii\nhogfish, hog_snapper, Lachnolaimus_maximus\nslippery_dick, Halicoeres_bivittatus\npuddingwife, pudding-wife, Halicoeres_radiatus\nbluehead, Thalassoma_bifasciatum\npearly_razorfish, Hemipteronatus_novacula\ntautog, blackfish, Tautoga_onitis\ncunner, bergall, Tautogolabrus_adspersus\nparrotfish, polly_fish, pollyfish\nthreadfin\njawfish\nstargazer\nsand_stargazer\nblenny, combtooth_blenny\nshanny, Blennius_pholis\nMolly_Miller, Scartella_cristata\nclinid, clinid_fish\npikeblenny\nbluethroat_pikeblenny, Chaenopsis_ocellata\ngunnel, bracketed_blenny\nrock_gunnel, butterfish, Pholis_gunnellus\neelblenny\nwrymouth, ghostfish, Cryptacanthodes_maculatus\nwolffish, wolf_fish, catfish\nviviparous_eelpout, Zoarces_viviparus\nocean_pout, Macrozoarces_americanus\nsand_lance, sand_launce, sand_eel, launce\ndragonet\ngoby, gudgeon\nmudskipper, mudspringer\nsleeper, sleeper_goby\nflathead\narcherfish, Toxotes_jaculatrix\nsurgeonfish\ngempylid\nsnake_mackerel, Gempylus_serpens\nescolar, Lepidocybium_flavobrunneum\noilfish, Ruvettus_pretiosus\ncutlassfish, frost_fish, hairtail\nscombroid, scombroid_fish\nmackerel\ncommon_mackerel, shiner, Scomber_scombrus\nSpanish_mackerel, Scomber_colias\nchub_mackerel, tinker, Scomber_japonicus\nwahoo, Acanthocybium_solandri\nSpanish_mackerel\nking_mackerel, cavalla, cero, Scomberomorus_cavalla\nScomberomorus_maculatus\ncero, pintado, kingfish, Scomberomorus_regalis\nsierra, Scomberomorus_sierra\ntuna, tunny\nalbacore, long-fin_tunny, Thunnus_alalunga\nbluefin, bluefin_tuna, horse_mackerel, Thunnus_thynnus\nyellowfin, yellowfin_tuna, Thunnus_albacares\nbonito\nskipjack, Atlantic_bonito, Sarda_sarda\nChile_bonito, Chilean_bonito, Pacific_bonito, Sarda_chiliensis\nskipjack, skipjack_tuna, Euthynnus_pelamis\nbonito, oceanic_bonito, Katsuwonus_pelamis\nswordfish, Xiphias_gladius\nsailfish\nAtlantic_sailfish, Istiophorus_albicans\nbillfish\nmarlin\nblue_marlin, Makaira_nigricans\nblack_marlin, Makaira_mazara, Makaira_marlina\nstriped_marlin, Makaira_mitsukurii\nwhite_marlin, Makaira_albida\nspearfish\nlouvar, Luvarus_imperialis\ndollarfish, Poronotus_triacanthus\npalometa, California_pompano, Palometa_simillima\nharvestfish, Paprilus_alepidotus\ndriftfish\nbarrelfish, black_rudderfish, Hyperglyphe_perciformis\nclingfish\ntripletail\nAtlantic_tripletail, Lobotes_surinamensis\nPacific_tripletail, Lobotes_pacificus\nmojarra\nyellowfin_mojarra, Gerres_cinereus\nsilver_jenny, Eucinostomus_gula\nwhiting\nganoid, ganoid_fish\nbowfin, grindle, dogfish, Amia_calva\npaddlefish, duckbill, Polyodon_spathula\nChinese_paddlefish, Psephurus_gladis\nsturgeon\nPacific_sturgeon, white_sturgeon, Sacramento_sturgeon, Acipenser_transmontanus\nbeluga, hausen, white_sturgeon, Acipenser_huso\ngar, garfish, garpike, billfish, Lepisosteus_osseus\nscorpaenoid, scorpaenoid_fish\nscorpaenid, scorpaenid_fish\nscorpionfish, scorpion_fish, sea_scorpion\nplumed_scorpionfish, Scorpaena_grandicornis\nlionfish\nstonefish, Synanceja_verrucosa\nrockfish\ncopper_rockfish, Sebastodes_caurinus\nvermillion_rockfish, rasher, Sebastodes_miniatus\nred_rockfish, Sebastodes_ruberrimus\nrosefish, ocean_perch, Sebastodes_marinus\nbullhead\nmiller's-thumb\nsea_raven, Hemitripterus_americanus\nlumpfish, Cyclopterus_lumpus\nlumpsucker\npogge, armed_bullhead, Agonus_cataphractus\ngreenling\nkelp_greenling, Hexagrammos_decagrammus\npainted_greenling, convict_fish, convictfish, Oxylebius_pictus\nflathead\ngurnard\ntub_gurnard, yellow_gurnard, Trigla_lucerna\nsea_robin, searobin\nnorthern_sea_robin, Prionotus_carolinus\nflying_gurnard, flying_robin, butterflyfish\nplectognath, plectognath_fish\ntriggerfish\nqueen_triggerfish, Bessy_cerca, oldwench, oldwife, Balistes_vetula\nfilefish\nleatherjacket, leatherfish\nboxfish, trunkfish\ncowfish, Lactophrys_quadricornis\npuffer, pufferfish, blowfish, globefish\nspiny_puffer\nporcupinefish, porcupine_fish, Diodon_hystrix\nballoonfish, Diodon_holocanthus\nburrfish\nocean_sunfish, sunfish, mola, headfish\nsharptail_mola, Mola_lanceolata\nflatfish\nflounder\nrighteye_flounder, righteyed_flounder\nplaice, Pleuronectes_platessa\nEuropean_flatfish, Platichthys_flesus\nyellowtail_flounder, Limanda_ferruginea\nwinter_flounder, blackback_flounder, lemon_sole, Pseudopleuronectes_americanus\nlemon_sole, Microstomus_kitt\nAmerican_plaice, Hippoglossoides_platessoides\nhalibut, holibut\nAtlantic_halibut, Hippoglossus_hippoglossus\nPacific_halibut, Hippoglossus_stenolepsis\nlefteye_flounder, lefteyed_flounder\nsouthern_flounder, Paralichthys_lethostigmus\nsummer_flounder, Paralichthys_dentatus\nwhiff\nhorned_whiff, Citharichthys_cornutus\nsand_dab\nwindowpane, Scophthalmus_aquosus\nbrill, Scophthalmus_rhombus\nturbot, Psetta_maxima\ntonguefish, tongue-fish\nsole\nEuropean_sole, Solea_solea\nEnglish_sole, lemon_sole, Parophrys_vitulus\nhogchoker, Trinectes_maculatus\naba\nabacus\nabandoned_ship, derelict\nA_battery\nabattoir, butchery, shambles, slaughterhouse\nabaya\nAbbe_condenser\nabbey\nabbey\nabbey\nAbney_level\nabrader, abradant\nabrading_stone\nabutment\nabutment_arch\nacademic_costume\nacademic_gown, academic_robe, judge's_robe\naccelerator, throttle, throttle_valve\naccelerator, particle_accelerator, atom_smasher\naccelerator, accelerator_pedal, gas_pedal, gas, throttle, gun\naccelerometer\naccessory, accoutrement, accouterment\naccommodating_lens_implant, accommodating_IOL\naccommodation\naccordion, piano_accordion, squeeze_box\nacetate_disk, phonograph_recording_disk\nacetate_rayon, acetate\nachromatic_lens\nacoustic_delay_line, sonic_delay_line\nacoustic_device\nacoustic_guitar\nacoustic_modem\nacropolis\nacrylic\nacrylic, acrylic_paint\nactinometer\naction, action_mechanism\nactive_matrix_screen\nactuator\nadapter, adaptor\nadder\nadding_machine, totalizer, totaliser\naddressing_machine, Addressograph\nadhesive_bandage\nadit\nadjoining_room\nadjustable_wrench, adjustable_spanner\nadobe, adobe_brick\nadz, adze\naeolian_harp, aeolian_lyre, wind_harp\naerator\naerial_torpedo\naerosol, aerosol_container, aerosol_can, aerosol_bomb, spray_can\nAertex\nafghan\nAfro-wig\nafterburner\nafter-shave, after-shave_lotion\nagateware\nagglomerator\naglet, aiglet, aiguilette\naglet, aiglet\nagora, public_square\naigrette, aigret\naileron\nair_bag\nairbrake\nairbrush\nairbus\nair_compressor\nair_conditioner, air_conditioning\naircraft\naircraft_carrier, carrier, flattop, attack_aircraft_carrier\naircraft_engine\nair_cushion, air_spring\nairdock, hangar, repair_shed\nairfield, landing_field, flying_field, field\nair_filter, air_cleaner\nairfoil, aerofoil, control_surface, surface\nairframe\nair_gun, airgun, air_rifle\nair_hammer, jackhammer, pneumatic_hammer\nair_horn\nairing_cupboard\nairliner\nairmailer\nairplane, aeroplane, plane\nairplane_propeller, airscrew, prop\nairport, airdrome, aerodrome, drome\nair_pump, vacuum_pump\nair_search_radar\nairship, dirigible\nair_terminal, airport_terminal\nair-to-air_missile\nair-to-ground_missile, air-to-surface_missile\naisle\nAladdin's_lamp\nalarm, warning_device, alarm_system\nalarm_clock, alarm\nalb\nalcazar\nalcohol_thermometer, alcohol-in-glass_thermometer\nalehouse\nalembic\nalgometer\nalidade, alidad\nalidade, alidad\nA-line\nAllen_screw\nAllen_wrench\nalligator_wrench\nalms_dish, alms_tray\nalpaca\nalpenstock\naltar\naltar, communion_table, Lord's_table\naltarpiece, reredos\naltazimuth\nalternator\naltimeter\nAmati\nambulance\namen_corner\nAmerican_organ\nammeter\nammonia_clock\nammunition, ammo\namphibian, amphibious_aircraft\namphibian, amphibious_vehicle\namphitheater, amphitheatre, coliseum\namphitheater, amphitheatre\namphora\namplifier\nampulla\namusement_arcade\nanalog_clock\nanalog_computer, analogue_computer\nanalog_watch\nanalytical_balance, chemical_balance\nanalyzer, analyser\nanamorphosis, anamorphism\nanastigmat\nanchor, ground_tackle\nanchor_chain, anchor_rope\nanchor_light, riding_light, riding_lamp\nAND_circuit, AND_gate\nandiron, firedog, dog, dog-iron\nandroid, humanoid, mechanical_man\nanechoic_chamber\nanemometer, wind_gauge, wind_gage\naneroid_barometer, aneroid\nangiocardiogram\nangioscope\nangle_bracket, angle_iron\nangledozer\nankle_brace\nanklet, anklets, bobbysock, bobbysocks\nanklet\nankus\nanode\nanode\nanswering_machine\nantenna, aerial, transmitting_aerial\nanteroom, antechamber, entrance_hall, hall, foyer, lobby, vestibule\nantiaircraft, antiaircraft_gun, flak, flack, pom-pom, ack-ack, ack-ack_gun\nantiballistic_missile, ABM\nantifouling_paint\nanti-G_suit, G_suit\nantimacassar\nantiperspirant\nanti-submarine_rocket\nanvil\nao_dai\napadana\napartment, flat\napartment_building, apartment_house\naperture\naperture\napiary, bee_house\napparatus, setup\napparel, wearing_apparel, dress, clothes\napplecart\nappliance\nappliance, contraption, contrivance, convenience, gadget, gizmo, gismo, widget\napplicator, applier\nappointment, fitting\napron\napron_string\napse, apsis\naqualung, Aqua-Lung, scuba\naquaplane\naquarium, fish_tank, marine_museum\narabesque\narbor, arbour, bower, pergola\narcade, colonnade\narch\narchitecture\narchitrave\narch_support\narc_lamp, arc_light\narctic, galosh, golosh, rubber, gumshoe\narea\nareaway\nargyle, argyll\nark\narm\narmament\narmature\narmband\narmchair\narmet\narm_guard, arm_pad\narmhole\narmilla\narmlet, arm_band\narmoire\narmor, armour\narmored_car, armoured_car\narmored_car, armoured_car\narmored_personnel_carrier, armoured_personnel_carrier, APC\narmored_vehicle, armoured_vehicle\narmor_plate, armour_plate, armor_plating, plate_armor, plate_armour\narmory, armoury, arsenal\narmrest\narquebus, harquebus, hackbut, hagbut\narray\narray, raiment, regalia\narrester, arrester_hook\narrow\narsenal, armory, armoury\narterial_road\narthrogram\narthroscope\nartificial_heart\nartificial_horizon, gyro_horizon, flight_indicator\nartificial_joint\nartificial_kidney, hemodialyzer\nartificial_skin\nartillery, heavy_weapon, gun, ordnance\nartillery_shell\nartist's_loft\nart_school\nascot\nashcan, trash_can, garbage_can, wastebin, ash_bin, ash-bin, ashbin, dustbin, trash_barrel, trash_bin\nash-pan\nashtray\naspergill, aspersorium\naspersorium\naspirator\naspirin_powder, headache_powder\nassault_gun\nassault_rifle, assault_gun\nassegai, assagai\nassembly\nassembly\nassembly_hall\nassembly_plant\nastatic_coils\nastatic_galvanometer\nastrodome\nastrolabe\nastronomical_telescope\nastronomy_satellite\nathenaeum, atheneum\nathletic_sock, sweat_sock, varsity_sock\nathletic_supporter, supporter, suspensor, jockstrap, jock\natlas, telamon\natmometer, evaporometer\natom_bomb, atomic_bomb, A-bomb, fission_bomb, plutonium_bomb\natomic_clock\natomic_pile, atomic_reactor, pile, chain_reactor\natomizer, atomiser, spray, sprayer, nebulizer, nebuliser\natrium\nattache_case, attache\nattachment, bond\nattack_submarine\nattenuator\nattic\nattic_fan\nattire, garb, dress\naudio_amplifier\naudiocassette\naudio_CD, audio_compact_disc\naudiometer, sonometer\naudio_system, sound_system\naudiotape\naudiotape\naudiovisual, audiovisual_aid\nauditorium\nauger, gimlet, screw_auger, wimble\nautobahn\nautoclave, sterilizer, steriliser\nautofocus\nautogiro, autogyro, gyroplane\nautoinjector\nautoloader, self-loader\nautomat\nautomat\nautomatic_choke\nautomatic_firearm, automatic_gun, automatic_weapon\nautomatic_pistol, automatic\nautomatic_rifle, automatic, machine_rifle\nautomatic_transmission, automatic_drive\nautomation\nautomaton, robot, golem\nautomobile_engine\nautomobile_factory, auto_factory, car_factory\nautomobile_horn, car_horn, motor_horn, horn, hooter\nautopilot, automatic_pilot, robot_pilot\nautoradiograph\nautostrada\nauxiliary_boiler, donkey_boiler\nauxiliary_engine, donkey_engine\nauxiliary_pump, donkey_pump\nauxiliary_research_submarine\nauxiliary_storage, external_storage, secondary_storage\naviary, bird_sanctuary, volary\nawl\nawning, sunshade, sunblind\nax, axe\nax_handle, axe_handle\nax_head, axe_head\naxis, axis_of_rotation\naxle\naxle_bar\naxletree\nbabushka\nbaby_bed, baby's_bed\nbaby_buggy, baby_carriage, carriage, perambulator, pram, stroller, go-cart, pushchair, pusher\nbaby_grand, baby_grand_piano, parlor_grand, parlor_grand_piano, parlour_grand, parlour_grand_piano\nbaby_powder\nbaby_shoe\nback, backrest\nback\nbackbench\nbackboard\nbackboard, basketball_backboard\nbackbone\nback_brace\nbackgammon_board\nbackground, desktop, screen_background\nbackhoe\nbacklighting\nbackpack, back_pack, knapsack, packsack, rucksack, haversack\nbackpacking_tent, pack_tent\nbackplate\nback_porch\nbacksaw, back_saw\nbackscratcher\nbackseat\nbackspace_key, backspace, backspacer\nbackstairs\nbackstay\nbackstop\nbacksword\nbackup_system\nbadminton_court\nbadminton_equipment\nbadminton_racket, badminton_racquet, battledore\nbag\nbag, traveling_bag, travelling_bag, grip, suitcase\nbag, handbag, pocketbook, purse\nbaggage, luggage\nbaggage\nbaggage_car, luggage_van\nbaggage_claim\nbagpipe\nbailey\nbailey\nBailey_bridge\nbain-marie\nbait, decoy, lure\nbaize\nbakery, bakeshop, bakehouse\nbalaclava, balaclava_helmet\nbalalaika\nbalance\nbalance_beam, beam\nbalance_wheel, balance\nbalbriggan\nbalcony\nbalcony\nbaldachin\nbaldric, baldrick\nbale\nbaling_wire\nball\nball\nball_and_chain\nball-and-socket_joint\nballast, light_ballast\nball_bearing, needle_bearing, roller_bearing\nball_cartridge\nballcock, ball_cock\nballdress\nballet_skirt, tutu\nball_gown\nballistic_galvanometer\nballistic_missile\nballistic_pendulum\nballistocardiograph, cardiograph\nballoon\nballoon_bomb, Fugo\nballoon_sail\nballot_box\nballpark, park\nball-peen_hammer\nballpoint, ballpoint_pen, ballpen, Biro\nballroom, dance_hall, dance_palace\nball_valve\nbalsa_raft, Kon_Tiki\nbaluster\nbanana_boat\nband\nbandage, patch\nBand_Aid\nbandanna, bandana\nbandbox\nbanderilla\nbandoleer, bandolier\nbandoneon\nbandsaw, band_saw\nbandwagon\nbangalore_torpedo\nbangle, bauble, gaud, gewgaw, novelty, fallal, trinket\nbanjo\nbanner, streamer\nbannister, banister, balustrade, balusters, handrail\nbanquette\nbanyan, banian\nbaptismal_font, baptistry, baptistery, font\nbar\nbar\nbarbecue, barbeque\nbarbed_wire, barbwire\nbarbell\nbarber_chair\nbarbershop\nbarbette_carriage\nbarbican, barbacan\nbar_bit\nbareboat\nbarge, flatboat, hoy, lighter\nbarge_pole\nbaritone, baritone_horn\nbark, barque\nbar_magnet\nbar_mask\nbarn\nbarndoor\nbarn_door\nbarnyard\nbarograph\nbarometer\nbarong\nbarouche\nbar_printer\nbarrack\nbarrage_balloon\nbarrel, cask\nbarrel, gun_barrel\nbarrelhouse, honky-tonk\nbarrel_knot, blood_knot\nbarrel_organ, grind_organ, hand_organ, hurdy_gurdy, hurdy-gurdy, street_organ\nbarrel_vault\nbarrette\nbarricade\nbarrier\nbarroom, bar, saloon, ginmill, taproom\nbarrow, garden_cart, lawn_cart, wheelbarrow\nbascule\nbase, pedestal, stand\nbase, bag\nbaseball\nbaseball_bat, lumber\nbaseball_cap, jockey_cap, golf_cap\nbaseball_equipment\nbaseball_glove, glove, baseball_mitt, mitt\nbasement, cellar\nbasement\nbasic_point_defense_missile_system\nbasilica, Roman_basilica\nbasilica\nbasilisk\nbasin\nbasinet\nbasket, handbasket\nbasket, basketball_hoop, hoop\nbasketball\nbasketball_court\nbasketball_equipment\nbasket_weave\nbass\nbass_clarinet\nbass_drum, gran_casa\nbasset_horn\nbass_fiddle, bass_viol, bull_fiddle, double_bass, contrabass, string_bass\nbass_guitar\nbass_horn, sousaphone, tuba\nbassinet\nbassinet\nbassoon\nbaster\nbastinado\nbastion\nbastion, citadel\nbat\nbath\nbath_chair\nbathhouse, bagnio\nbathhouse, bathing_machine\nbathing_cap, swimming_cap\nbath_oil\nbathrobe\nbathroom, bath\nbath_salts\nbath_towel\nbathtub, bathing_tub, bath, tub\nbathyscaphe, bathyscaph, bathyscape\nbathysphere\nbatik\nbatiste\nbaton, wand\nbaton\nbaton\nbaton\nbattering_ram\nbatter's_box\nbattery, electric_battery\nbattery, stamp_battery\nbatting_cage, cage\nbatting_glove\nbatting_helmet\nbattle-ax, battle-axe\nbattle_cruiser\nbattle_dress\nbattlement, crenelation, crenellation\nbattleship, battlewagon\nbattle_sight, battlesight\nbay\nbay\nbayonet\nbay_rum\nbay_window, bow_window\nbazaar, bazar\nbazaar, bazar\nbazooka\nB_battery\nBB_gun\nbeach_house\nbeach_towel\nbeach_wagon, station_wagon, wagon, estate_car, beach_waggon, station_waggon, waggon\nbeachwear\nbeacon, lighthouse, beacon_light, pharos\nbeading_plane\nbeaker\nbeaker\nbeam\nbeam_balance\nbeanbag\nbeanie, beany\nbearing\nbearing_rein, checkrein\nbearing_wall\nbearskin, busby, shako\nbeater\nbeating-reed_instrument, reed_instrument, reed\nbeaver, castor\nbeaver\nBeckman_thermometer\nbed\nbed\nbed_and_breakfast, bed-and-breakfast\nbedclothes, bed_clothing, bedding\nBedford_cord\nbed_jacket\nbedpan\nbedpost\nbedroll\nbedroom, sleeping_room, sleeping_accommodation, chamber, bedchamber\nbedroom_furniture\nbedsitting_room, bedsitter, bedsit\nbedspread, bedcover, bed_cover, bed_covering, counterpane, spread\nbedspring\nbedstead, bedframe\nbeefcake\nbeehive, hive\nbeeper, pager\nbeer_barrel, beer_keg\nbeer_bottle\nbeer_can\nbeer_garden\nbeer_glass\nbeer_hall\nbeer_mat\nbeer_mug, stein\nbelaying_pin\nbelfry\nbell\nbell_arch\nbellarmine, longbeard, long-beard, greybeard\nbellbottom_trousers, bell-bottoms, bellbottom_pants\nbell_cote, bell_cot\nbell_foundry\nbell_gable\nbell_jar, bell_glass\nbellows\nbellpull\nbell_push\nbell_seat, balloon_seat\nbell_tent\nbell_tower\nbellyband\nbelt\nbelt, belt_ammunition, belted_ammunition\nbelt_buckle\nbelting\nbench\nbench_clamp\nbench_hook\nbench_lathe\nbench_press\nbender\nberet\nberlin\nBermuda_shorts, Jamaica_shorts\nberth, bunk, built_in_bed\nbesom\nBessemer_converter\nbethel\nbetting_shop\nbevatron\nbevel, bevel_square\nbevel_gear, pinion_and_crown_wheel, pinion_and_ring_gear\nB-flat_clarinet, licorice_stick\nbib\nbib-and-tucker\nbicorn, bicorne\nbicycle, bike, wheel, cycle\nbicycle-built-for-two, tandem_bicycle, tandem\nbicycle_chain\nbicycle_clip, trouser_clip\nbicycle_pump\nbicycle_rack\nbicycle_seat, saddle\nbicycle_wheel\nbidet\nbier\nbier\nbi-fold_door\nbifocals\nBig_Blue, BLU-82\nbig_board\nbight\nbikini, two-piece\nbikini_pants\nbilge\nbilge_keel\nbilge_pump\nbilge_well\nbill, peak, eyeshade, visor, vizor\nbill, billhook\nbillboard, hoarding\nbilliard_ball\nbilliard_room, billiard_saloon, billiard_parlor, billiard_parlour, billiard_hall\nbin\nbinder, ligature\nbinder, ring-binder\nbindery\nbinding, book_binding, cover, back\nbin_liner\nbinnacle\nbinoculars, field_glasses, opera_glasses\nbinocular_microscope\nbiochip\nbiohazard_suit\nbioscope\nbiplane\nbirch, birch_rod\nbirchbark_canoe, birchbark, birch_bark\nbirdbath\nbirdcage\nbirdcall\nbird_feeder, birdfeeder, feeder\nbirdhouse\nbird_shot, buckshot, duck_shot\nbiretta, berretta, birretta\nbishop\nbistro\nbit\nbit\nbite_plate, biteplate\nbitewing\nbitumastic\nblack\nblack\nblackboard, chalkboard\nblackboard_eraser\nblack_box\nblackface\nblackjack, cosh, sap\nblack_tie\nblackwash\nbladder\nblade\nblade, vane\nblade\nblank, dummy, blank_shell\nblanket, cover\nblast_furnace\nblasting_cap\nblazer, sport_jacket, sport_coat, sports_jacket, sports_coat\nblender, liquidizer, liquidiser\nblimp, sausage_balloon, sausage\nblind, screen\nblind_curve, blind_bend\nblindfold\nbling, bling_bling\nblinker, flasher\nblister_pack, bubble_pack\nblock\nblockade\nblockade-runner\nblock_and_tackle\nblockbuster\nblockhouse\nblock_plane\nbloodmobile\nbloomers, pants, drawers, knickers\nblouse\nblower\nblowtorch, torch, blowlamp\nblucher\nbludgeon\nblue\nblue_chip\nblunderbuss\nblunt_file\nboarding\nboarding_house, boardinghouse\nboardroom, council_chamber\nboards\nboat\nboater, leghorn, Panama, Panama_hat, sailor, skimmer, straw_hat\nboat_hook\nboathouse\nboatswain's_chair, bosun's_chair\nboat_train\nboatyard\nbobbin, spool, reel\nbobby_pin, hairgrip, grip\nbobsled, bobsleigh, bob\nbobsled, bobsleigh\nbocce_ball, bocci_ball, boccie_ball\nbodega\nbodice\nbodkin, threader\nbodkin\nbodkin\nbody\nbody_armor, body_armour, suit_of_armor, suit_of_armour, coat_of_mail, cataphract\nbody_lotion\nbody_stocking\nbody_plethysmograph\nbody_pad\nbodywork\nBofors_gun\nbogy, bogie, bogey\nboiler, steam_boiler\nboiling_water_reactor, BWR\nbolero\nbollard, bitt\nbolo, bolo_knife\nbolo_tie, bolo, bola_tie, bola\nbolt\nbolt, deadbolt\nbolt\nbolt_cutter\nbomb\nbombazine\nbomb_calorimeter, bomb\nbomber\nbomber_jacket\nbomblet, cluster_bomblet\nbomb_rack\nbombshell\nbomb_shelter, air-raid_shelter, bombproof\nbone-ash_cup, cupel, refractory_pot\nbone_china\nbones, castanets, clappers, finger_cymbals\nboneshaker\nbongo, bongo_drum\nbonnet, poke_bonnet\nbook\nbook_bag\nbookbindery\nbookcase\nbookend\nbookmark, bookmarker\nbookmobile\nbookshelf\nbookshop, bookstore, bookstall\nboom\nboom, microphone_boom\nboomerang, throwing_stick, throw_stick\nbooster, booster_rocket, booster_unit, takeoff_booster, takeoff_rocket\nbooster, booster_amplifier, booster_station, relay_link, relay_station, relay_transmitter\nboot\nboot\nboot_camp\nbootee, bootie\nbooth, cubicle, stall, kiosk\nbooth\nbooth\nboothose\nbootjack\nbootlace\nbootleg\nbootstrap\nbore_bit, borer, rock_drill, stone_drill\nboron_chamber\nborstal\nbosom\nBoston_rocker\nbota\nbottle\nbottle, feeding_bottle, nursing_bottle\nbottle_bank\nbottlebrush\nbottlecap\nbottle_opener\nbottling_plant\nbottom, freighter, merchantman, merchant_ship\nboucle\nboudoir\nboulle, boule, buhl\nbouncing_betty\nbouquet, corsage, posy, nosegay\nboutique, dress_shop\nboutonniere\nbow\nbow\nbow, bowknot\nbow_and_arrow\nbowed_stringed_instrument, string\nBowie_knife\nbowl\nbowl\nbowl\nbowler_hat, bowler, derby_hat, derby, plug_hat\nbowline, bowline_knot\nbowling_alley\nbowling_ball, bowl\nbowling_equipment\nbowling_pin, pin\nbowling_shoe\nbowsprit\nbowstring\nbow_tie, bow-tie, bowtie\nbox\nbox, loge\nbox, box_seat\nbox_beam, box_girder\nbox_camera, box_Kodak\nboxcar\nbox_coat\nboxing_equipment\nboxing_glove, glove\nbox_office, ticket_office, ticket_booth\nbox_spring\nbox_wrench, box_end_wrench\nbrace, bracing\nbrace, braces, orthodontic_braces\nbrace\nbrace, suspender, gallus\nbrace_and_bit\nbracelet, bangle\nbracer, armguard\nbrace_wrench\nbracket, wall_bracket\nbradawl, pricker\nbrake\nbrake\nbrake_band\nbrake_cylinder, hydraulic_brake_cylinder, master_cylinder\nbrake_disk\nbrake_drum, drum\nbrake_lining\nbrake_pad\nbrake_pedal\nbrake_shoe, shoe, skid\nbrake_system, brakes\nbrass, brass_instrument\nbrass, memorial_tablet, plaque\nbrass\nbrassard\nbrasserie\nbrassie\nbrassiere, bra, bandeau\nbrass_knucks, knucks, brass_knuckles, knuckles, knuckle_duster\nbrattice\nbrazier, brasier\nbreadbasket\nbread-bin, breadbox\nbread_knife\nbreakable\nbreakfast_area, breakfast_nook\nbreakfast_table\nbreakwater, groin, groyne, mole, bulwark, seawall, jetty\nbreast_drill\nbreast_implant\nbreastplate, aegis, egis\nbreast_pocket\nbreathalyzer, breathalyser\nbreechblock, breech_closer\nbreechcloth, breechclout, loincloth\nbreeches, knee_breeches, knee_pants, knickerbockers, knickers\nbreeches_buoy\nbreechloader\nbreeder_reactor\nBren, Bren_gun\nbrewpub\nbrick\nbrickkiln\nbricklayer's_hammer\nbrick_trowel, mason's_trowel\nbrickwork\nbridal_gown, wedding_gown, wedding_dress\nbridge, span\nbridge, nosepiece\nbridle\nbridle_path, bridle_road\nbridoon\nbriefcase\nbriefcase_bomb\nbriefcase_computer\nbriefs, Jockey_shorts\nbrig\nbrig\nbrigandine\nbrigantine, hermaphrodite_brig\nbrilliantine\nbrilliant_pebble\nbrim\nbristle_brush\nbritches\nbroad_arrow\nbroadax, broadaxe\nbrochette\nbroadcaster, spreader\nbroadcloth\nbroadcloth\nbroad_hatchet\nbroadloom\nbroadside\nbroadsword\nbrocade\nbrogan, brogue, clodhopper, work_shoe\nbroiler\nbroken_arch\nbronchoscope\nbroom\nbroom_closet\nbroomstick, broom_handle\nbrougham\nBrowning_automatic_rifle, BAR\nBrowning_machine_gun, Peacemaker\nbrownstone\nbrunch_coat\nbrush\nBrussels_carpet\nBrussels_lace\nbubble\nbubble_chamber\nbubble_jet_printer, bubble-jet_printer, bubblejet\nbuckboard\nbucket, pail\nbucket_seat\nbucket_shop\nbuckle\nbuckram\nbucksaw\nbuckskins\nbuff, buffer\nbuffer, polisher\nbuffer, buffer_storage, buffer_store\nbuffet, counter, sideboard\nbuffing_wheel\nbuggy, roadster\nbugle\nbuilding, edifice\nbuilding_complex, complex\nbulldog_clip, alligator_clip\nbulldog_wrench\nbulldozer, dozer\nbullet, slug\nbulletproof_vest\nbullet_train, bullet\nbullhorn, loud_hailer, loud-hailer\nbullion\nbullnose, bullnosed_plane\nbullpen, detention_cell, detention_centre\nbullpen\nbullring\nbulwark\nbumboat\nbumper\nbumper\nbumper_car, Dodgem\nbumper_guard\nbumper_jack\nbundle, sheaf\nbung, spile\nbungalow, cottage\nbungee, bungee_cord\nbunghole\nbunk\nbunk, feed_bunk\nbunk_bed, bunk\nbunker, sand_trap, trap\nbunker, dugout\nbunker\nbunsen_burner, bunsen, etna\nbunting\nbur, burr\nBurberry\nburette, buret\nburglar_alarm\nburial_chamber, sepulcher, sepulchre, sepulture\nburial_garment\nburial_mound, grave_mound, barrow, tumulus\nburin\nburqa, burka\nburlap, gunny\nburn_bag\nburner\nburnous, burnoose, burnouse\nburp_gun, machine_pistol\nburr\nbus, autobus, coach, charabanc, double-decker, jitney, motorbus, motorcoach, omnibus, passenger_vehicle\nbushel_basket\nbushing, cylindrical_lining\nbush_jacket\nbusiness_suit\nbuskin, combat_boot, desert_boot, half_boot, top_boot\nbustier\nbustle\nbutcher_knife\nbutcher_shop, meat_market\nbutter_dish\nbutterfly_valve\nbutter_knife\nbutt_hinge\nbutt_joint, butt\nbutton\nbuttonhook\nbuttress, buttressing\nbutt_shaft\nbutt_weld, butt-weld\nbuzz_bomb, robot_bomb, flying_bomb, doodlebug, V-1\nbuzzer\nBVD, BVD's\nbypass_condenser, bypass_capacitor\nbyway, bypath, byroad\ncab, hack, taxi, taxicab\ncab, cabriolet\ncab\ncabana\ncabaret, nightclub, night_club, club, nightspot\ncaber\ncabin\ncabin\ncabin_car, caboose\ncabin_class, second_class, economy_class\ncabin_cruiser, cruiser, pleasure_boat, pleasure_craft\ncabinet\ncabinet, console\ncabinet, locker, storage_locker\ncabinetwork\ncabin_liner\ncable, cable_television, cable_system, cable_television_service\ncable, line, transmission_line\ncable_car, car\ncache, memory_cache\ncaddy, tea_caddy\ncaesium_clock\ncafe, coffeehouse, coffee_shop, coffee_bar\ncafeteria\ncafeteria_tray\ncaff\ncaftan, kaftan\ncaftan, kaftan\ncage, coop\ncage\ncagoule\ncaisson\ncalash, caleche, calash_top\ncalceus\ncalcimine\ncalculator, calculating_machine\ncaldron, cauldron\ncalico\ncaliper, calliper\ncall-board\ncall_center, call_centre\ncaller_ID\ncalliope, steam_organ\ncalorimeter\ncalpac, calpack, kalpac\ncamail, aventail, ventail\ncamber_arch\ncambric\ncamcorder\ncamel's_hair, camelhair\ncamera, photographic_camera\ncamera_lens, optical_lens\ncamera_lucida\ncamera_obscura\ncamera_tripod\ncamise\ncamisole\ncamisole, underbodice\ncamlet\ncamouflage\ncamouflage, camo\ncamp, encampment, cantonment, bivouac\ncamp\ncamp, refugee_camp\ncampaign_hat\ncampanile, belfry\ncamp_chair\ncamper, camping_bus, motor_home\ncamper_trailer\ncampstool\ncamshaft\ncan, tin, tin_can\ncanal\ncanal_boat, narrow_boat, narrowboat\ncandelabrum, candelabra\ncandid_camera\ncandle, taper, wax_light\ncandlepin\ncandlesnuffer\ncandlestick, candle_holder\ncandlewick\ncandy_thermometer\ncane\ncane\ncangue\ncanister, cannister, tin\ncannery\ncannikin\ncannikin\ncannon\ncannon\ncannon\ncannon\ncannonball, cannon_ball, round_shot\ncanoe\ncan_opener, tin_opener\ncanopic_jar, canopic_vase\ncanopy\ncanopy\ncanopy\ncanteen\ncanteen\ncanteen\ncanteen, mobile_canteen\ncanteen\ncant_hook\ncantilever\ncantilever_bridge\ncantle\nCanton_crepe\ncanvas, canvass\ncanvas, canvass\ncanvas_tent, canvas, canvass\ncap\ncap\ncap\ncapacitor, capacitance, condenser, electrical_condenser\ncaparison, trapping, housing\ncape, mantle\ncapital_ship\ncapitol\ncap_opener\ncapote, hooded_cloak\ncapote, hooded_coat\ncap_screw\ncapstan\ncapstone, copestone, coping_stone, stretcher\ncapsule\ncaptain's_chair\ncar, auto, automobile, machine, motorcar\ncar, railcar, railway_car, railroad_car\ncar, elevator_car\ncarabiner, karabiner, snap_ring\ncarafe, decanter\ncaravansary, caravanserai, khan, caravan_inn\ncar_battery, automobile_battery\ncarbine\ncar_bomb\ncarbon_arc_lamp, carbon_arc\ncarboy\ncarburetor, carburettor\ncar_carrier\ncardcase\ncardiac_monitor, heart_monitor\ncardigan\ncard_index, card_catalog, card_catalogue\ncardiograph, electrocardiograph\ncardioid_microphone\ncar_door\ncardroom\ncard_table\ncard_table\ncar-ferry\ncargo_area, cargo_deck, cargo_hold, hold, storage_area\ncargo_container\ncargo_door\ncargo_hatch\ncargo_helicopter\ncargo_liner\ncargo_ship, cargo_vessel\ncarillon\ncar_mirror\ncaroche\ncarousel, carrousel, merry-go-round, roundabout, whirligig\ncarpenter's_hammer, claw_hammer, clawhammer\ncarpenter's_kit, tool_kit\ncarpenter's_level\ncarpenter's_mallet\ncarpenter's_rule\ncarpenter's_square\ncarpetbag\ncarpet_beater, rug_beater\ncarpet_loom\ncarpet_pad, rug_pad, underlay, underlayment\ncarpet_sweeper, sweeper\ncarpet_tack\ncarport, car_port\ncarrack, carack\ncarrel, carrell, cubicle, stall\ncarriage, equipage, rig\ncarriage\ncarriage_bolt\ncarriageway\ncarriage_wrench\ncarrick_bend\ncarrier\ncarryall, holdall, tote, tote_bag\ncarrycot\ncar_seat\ncart\ncar_tire, automobile_tire, auto_tire, rubber_tire\ncarton\ncartouche, cartouch\ncar_train\ncartridge\ncartridge, pickup\ncartridge_belt\ncartridge_extractor, cartridge_remover, extractor\ncartridge_fuse\ncartridge_holder, cartridge_clip, clip, magazine\ncartwheel\ncarving_fork\ncarving_knife\ncar_wheel\ncaryatid\ncascade_liquefier\ncascade_transformer\ncase\ncase, display_case, showcase, vitrine\ncase, compositor's_case, typesetter's_case\ncasein_paint, casein\ncase_knife, sheath_knife\ncase_knife\ncasement\ncasement_window\ncasern\ncase_shot, canister, canister_shot\ncash_bar\ncashbox, money_box, till\ncash_machine, cash_dispenser, automated_teller_machine, automatic_teller_machine, automated_teller, automatic_teller, ATM\ncashmere\ncash_register, register\ncasing, case\ncasino, gambling_casino\ncasket, jewel_casket\ncasque\ncasquet, casquetel\nCassegrainian_telescope, Gregorian_telescope\ncasserole\ncassette\ncassette_deck\ncassette_player\ncassette_recorder\ncassette_tape\ncassock\ncast, plaster_cast, plaster_bandage\ncaster, castor\ncaster, castor\ncastle\ncastle, rook\ncatacomb\ncatafalque\ncatalytic_converter\ncatalytic_cracker, cat_cracker\ncatamaran\ncatapult, arbalest, arbalist, ballista, bricole, mangonel, onager, trebuchet, trebucket\ncatapult, launcher\ncatboat\ncat_box\ncatch\ncatchall\ncatcher's_mask\ncatchment\nCaterpillar, cat\ncathedra, bishop's_throne\ncathedral\ncathedral, duomo\ncatheter\ncathode\ncathode-ray_tube, CRT\ncat-o'-nine-tails, cat\ncat's-paw\ncatsup_bottle, ketchup_bottle\ncattle_car\ncattle_guard, cattle_grid\ncattleship, cattle_boat\ncautery, cauterant\ncavalier_hat, slouch_hat\ncavalry_sword, saber, sabre\ncavetto\ncavity_wall\nC_battery\nC-clamp\nCD_drive\nCD_player\nCD-R, compact_disc_recordable, CD-WO, compact_disc_write-once\nCD-ROM, compact_disc_read-only_memory\nCD-ROM_drive\ncedar_chest\nceiling\ncelesta\ncell, electric_cell\ncell, jail_cell, prison_cell\ncellar, wine_cellar\ncellblock, ward\ncello, violoncello\ncellophane\ncellular_telephone, cellular_phone, cellphone, cell, mobile_phone\ncellulose_tape, Scotch_tape, Sellotape\ncenotaph, empty_tomb\ncenser, thurible\ncenter, centre\ncenter_punch\nCentigrade_thermometer\ncentral_processing_unit, CPU, C.P.U., central_processor, processor, mainframe\ncentrifugal_pump\ncentrifuge, extractor, separator\nceramic\nceramic_ware\ncereal_bowl\ncereal_box\ncerecloth\ncesspool, cesspit, sink, sump\nchachka, tsatske, tshatshke, tchotchke\nchador, chadar, chaddar, chuddar\nchafing_dish\nchain\nchain\nchainlink_fence\nchain_mail, ring_mail, mail, chain_armor, chain_armour, ring_armor, ring_armour\nchain_printer\nchain_saw, chainsaw\nchain_store\nchain_tongs\nchain_wrench\nchair\nchair\nchair_of_state\nchairlift, chair_lift\nchaise, shay\nchaise_longue, chaise, daybed\nchalet\nchalice, goblet\nchalk\nchallis\nchamberpot, potty, thunder_mug\nchambray\nchamfer_bit\nchamfer_plane\nchamois_cloth\nchancel, sanctuary, bema\nchancellery\nchancery\nchandelier, pendant, pendent\nchandlery\nchanfron, chamfron, testiere, frontstall, front-stall\nchanter, melody_pipe\nchantry\nchap\nchapel\nchapterhouse, fraternity_house, frat_house\nchapterhouse\ncharacter_printer, character-at-a-time_printer, serial_printer\ncharcuterie\ncharge-exchange_accelerator\ncharger, battery_charger\nchariot\nchariot\ncharnel_house, charnel\nchassis\nchassis\nchasuble\nchateau\nchatelaine\nchecker, chequer\ncheckout, checkout_counter\ncheekpiece\ncheeseboard, cheese_tray\ncheesecloth\ncheese_cutter\ncheese_press\nchemical_bomb, gas_bomb\nchemical_plant\nchemical_reactor\nchemise, sack, shift\nchemise, shimmy, shift, slip, teddy\nchenille\nchessman, chess_piece\nchest\nchesterfield\nchest_of_drawers, chest, bureau, dresser\nchest_protector\ncheval-de-frise, chevaux-de-frise\ncheval_glass\nchicane\nchicken_coop, coop, hencoop, henhouse\nchicken_wire\nchicken_yard, hen_yard, chicken_run, fowl_run\nchiffon\nchiffonier, commode\nchild's_room\nchime, bell, gong\nchimney_breast\nchimney_corner, inglenook\nchina\nchina_cabinet, china_closet\nchinchilla\nChinese_lantern\nChinese_puzzle\nchinning_bar\nchino\nchino\nchin_rest\nchin_strap\nchintz\nchip, microchip, micro_chip, silicon_chip, microprocessor_chip\nchip, poker_chip\nchisel\nchlamys\nchoir\nchoir_loft\nchoke\nchoke, choke_coil, choking_coil\nchokey, choky\nchoo-choo\nchopine, platform\nchordophone\nChristmas_stocking\nchronograph\nchronometer\nchronoscope\nchuck\nchuck_wagon\nchukka, chukka_boot\nchurch, church_building\nchurch_bell\nchurch_hat\nchurch_key\nchurch_tower\nchuridars\nchurn, butter_churn\nciderpress\ncigar_band\ncigar_box\ncigar_cutter\ncigarette_butt\ncigarette_case\ncigarette_holder\ncigar_lighter, cigarette_lighter, pocket_lighter\ncinch, girth\ncinema, movie_theater, movie_theatre, movie_house, picture_palace\ncinquefoil\ncircle, round\ncirclet\ncircuit, electrical_circuit, electric_circuit\ncircuit_board, circuit_card, board, card, plug-in, add-in\ncircuit_breaker, breaker\ncircuitry\ncircular_plane, compass_plane\ncircular_saw, buzz_saw\ncircus_tent, big_top, round_top, top\ncistern\ncistern, water_tank\ncittern, cithern, cither, citole, gittern\ncity_hall\ncityscape\ncity_university\ncivies, civvies\ncivilian_clothing, civilian_dress, civilian_garb, plain_clothes\nclack_valve, clack, clapper_valve\nclamp, clinch\nclamshell, grapple\nclapper, tongue\nclapperboard\nclarence\nclarinet\nClark_cell, Clark_standard_cell\nclasp\nclasp_knife, jackknife\nclassroom, schoolroom\nclavichord\nclavier, Klavier\nclay_pigeon\nclaymore_mine, claymore\nclaymore\ncleaners, dry_cleaners\ncleaning_implement, cleaning_device, cleaning_equipment\ncleaning_pad\nclean_room, white_room\nclearway\ncleat\ncleat\ncleats\ncleaver, meat_cleaver, chopper\nclerestory, clearstory\nclevis\nclews\ncliff_dwelling\nclimbing_frame\nclinch\nclinch, clench\nclincher\nclinic\nclinical_thermometer, mercury-in-glass_clinical_thermometer\nclinker, clinker_brick\nclinometer, inclinometer\nclip\nclip_lead\nclip-on\nclipper\nclipper\nclipper, clipper_ship\ncloak\ncloak\ncloakroom, coatroom\ncloche\ncloche\nclock\nclock_pendulum\nclock_radio\nclock_tower\nclockwork\nclog, geta, patten, sabot\ncloisonne\ncloister\nclosed_circuit, loop\nclosed-circuit_television\nclosed_loop, closed-loop_system\ncloset\ncloseup_lens\ncloth_cap, flat_cap\ncloth_covering\nclothesbrush\nclothes_closet, clothespress\nclothes_dryer, clothes_drier\nclothes_hamper, laundry_basket, clothes_basket, voider\nclotheshorse\nclothespin, clothes_pin, clothes_peg\nclothes_tree, coat_tree, coat_stand\nclothing, article_of_clothing, vesture, wear, wearable, habiliment\nclothing_store, haberdashery, haberdashery_store, mens_store\nclout_nail, clout\nclove_hitch\nclub_car, lounge_car\nclubroom\ncluster_bomb\nclutch\nclutch, clutch_pedal\nclutch_bag, clutch\ncoach, four-in-hand, coach-and-four\ncoach_house, carriage_house, remise\ncoal_car\ncoal_chute\ncoal_house\ncoal_shovel\ncoaming\ncoaster_brake\ncoat\ncoat_button\ncoat_closet\ncoatdress\ncoatee\ncoat_hanger, clothes_hanger, dress_hanger\ncoating, coat\ncoating\ncoat_of_paint\ncoatrack, coat_rack, hatrack\ncoattail\ncoaxial_cable, coax, coax_cable\ncobweb\ncobweb\nCockcroft_and_Walton_accelerator, Cockcroft-Walton_accelerator, Cockcroft_and_Walton_voltage_multiplier, Cockcroft-Walton_voltage_multiplier\ncocked_hat\ncockhorse\ncockleshell\ncockpit\ncockpit\ncockpit\ncockscomb, coxcomb\ncocktail_dress, sheath\ncocktail_lounge\ncocktail_shaker\ncocotte\ncodpiece\ncoelostat\ncoffee_can\ncoffee_cup\ncoffee_filter\ncoffee_maker\ncoffee_mill, coffee_grinder\ncoffee_mug\ncoffeepot\ncoffee_stall\ncoffee_table, cocktail_table\ncoffee_urn\ncoffer\nCoffey_still\ncoffin, casket\ncog, sprocket\ncoif\ncoil, spiral, volute, whorl, helix\ncoil\ncoil\ncoil_spring, volute_spring\ncoin_box\ncolander, cullender\ncold_cathode\ncold_chisel, set_chisel\ncold_cream, coldcream, face_cream, vanishing_cream\ncold_frame\ncollar, neckband\ncollar\ncollege\ncollet, collet_chuck\ncollider\ncolliery, pit\ncollimator\ncollimator\ncologne, cologne_water, eau_de_cologne\ncolonnade\ncolonoscope\ncolorimeter, tintometer\ncolors, colours\ncolor_television, colour_television, color_television_system, colour_television_system, color_TV, colour_TV\ncolor_tube, colour_tube, color_television_tube, colour_television_tube, color_TV_tube, colour_TV_tube\ncolor_wash, colour_wash\nColt\ncolter, coulter\ncolumbarium\ncolumbarium, cinerarium\ncolumn, pillar\ncolumn, pillar\ncomb\ncomb\ncomber\ncombination_lock\ncombination_plane\ncombine\ncomforter, pacifier, baby's_dummy, teething_ring\ncommand_module\ncommissary\ncommissary\ncommodity, trade_good, good\ncommon_ax, common_axe, Dayton_ax, Dayton_axe\ncommon_room\ncommunications_satellite\ncommunication_system\ncommunity_center, civic_center\ncommutator\ncommuter, commuter_train\ncompact, powder_compact\ncompact, compact_car\ncompact_disk, compact_disc, CD\ncompact-disk_burner, CD_burner\ncompanionway\ncompartment\ncompartment\ncompass\ncompass\ncompass_card, mariner's_compass\ncompass_saw\ncompound\ncompound_lens\ncompound_lever\ncompound_microscope\ncompress\ncompression_bandage, tourniquet\ncompressor\ncomputer, computing_machine, computing_device, data_processor, electronic_computer, information_processing_system\ncomputer_circuit\ncomputerized_axial_tomography_scanner, CAT_scanner\ncomputer_keyboard, keypad\ncomputer_monitor\ncomputer_network\ncomputer_screen, computer_display\ncomputer_store\ncomputer_system, computing_system, automatic_data_processing_system, ADP_system, ADPS\nconcentration_camp, stockade\nconcert_grand, concert_piano\nconcert_hall\nconcertina\nconcertina\nconcrete_mixer, cement_mixer\ncondensation_pump, diffusion_pump\ncondenser, optical_condenser\ncondenser\ncondenser\ncondenser_microphone, capacitor_microphone\ncondominium\ncondominium, condo\nconductor\ncone_clutch, cone_friction_clutch\nconfectionery, confectionary, candy_store\nconference_center, conference_house\nconference_room\nconference_table, council_table, council_board\nconfessional\nconformal_projection, orthomorphic_projection\ncongress_boot, congress_shoe, congress_gaiter\nconic_projection, conical_projection\nconnecting_rod\nconnecting_room\nconnection, connexion, connector, connecter, connective\nconning_tower\nconning_tower\nconservatory, hothouse, indoor_garden\nconservatory, conservatoire\nconsole\nconsole\nconsole_table, console\nconsulate\ncontact, tangency\ncontact, contact_lens\ncontainer\ncontainer_ship, containership, container_vessel\ncontainment\ncontrabassoon, contrafagotto, double_bassoon\ncontrol, controller\ncontrol_center\ncontrol_circuit, negative_feedback_circuit\ncontrol_key, command_key\ncontrol_panel, instrument_panel, control_board, board, panel\ncontrol_rod\ncontrol_room\ncontrol_system\ncontrol_tower\nconvector\nconvenience_store\nconvent\nconventicle, meetinghouse\nconverging_lens, convex_lens\nconverter, convertor\nconvertible\nconvertible, sofa_bed\nconveyance, transport\nconveyer_belt, conveyor_belt, conveyer, conveyor, transporter\ncooker\ncookfire\ncookhouse\ncookie_cutter\ncookie_jar, cooky_jar\ncookie_sheet, baking_tray\ncooking_utensil, cookware\ncookstove\ncoolant_system\ncooler, ice_chest\ncooling_system, cooling\ncooling_system, engine_cooling_system\ncooling_tower\ncoonskin_cap, coonskin\ncope\ncoping_saw\ncopperware\ncopyholder\ncoquille\ncoracle\ncorbel, truss\ncorbel_arch\ncorbel_step, corbie-step, corbiestep, crow_step\ncorbie_gable\ncord, corduroy\ncord, electric_cord\ncordage\ncords, corduroys\ncore\ncore_bit\ncore_drill\ncorer\ncork, bottle_cork\ncorker\ncorkscrew, bottle_screw\ncorncrib\ncorner, quoin\ncorner, nook\ncorner_post\ncornet, horn, trumpet, trump\ncornice\ncornice\ncornice, valance, valance_board, pelmet\ncorrectional_institution\ncorrugated_fastener, wiggle_nail\ncorselet, corslet\ncorset, girdle, stays\ncosmetic\ncosmotron\ncostume\ncostume\ncostume\ncostume\ncosy, tea_cosy, cozy, tea_cozy\ncot, camp_bed\ncottage_tent\ncotter, cottar\ncotter_pin\ncotton\ncotton_flannel, Canton_flannel\ncotton_mill\ncouch\ncouch\ncouchette\ncoude_telescope, coude_system\ncounter\ncounter, tabulator\ncounter\ncounterbore, countersink, countersink_bit\ncounter_tube\ncountry_house\ncountry_store, general_store, trading_post\ncoupe\ncoupling, coupler\ncourt, courtyard\ncourt\ncourt, courtroom\ncourt\nCourtelle\ncourthouse\ncourthouse\ncoverall\ncovered_bridge\ncovered_couch\ncovered_wagon, Conestoga_wagon, Conestoga, prairie_wagon, prairie_schooner\ncovering\ncoverlet\ncover_plate\ncowbarn, cowshed, cow_barn, cowhouse, byre\ncowbell\ncowboy_boot\ncowboy_hat, ten-gallon_hat\ncowhide\ncowl\ncow_pen, cattle_pen, corral\nCPU_board, mother_board\ncrackle, crackleware, crackle_china\ncradle\ncraft\ncramp, cramp_iron\ncrampon, crampoon, climbing_iron, climber\ncrampon, crampoon\ncrane\ncraniometer\ncrank, starter\ncrankcase\ncrankshaft\ncrash_barrier\ncrash_helmet\ncrate\ncravat\ncrayon, wax_crayon\ncrazy_quilt\ncream, ointment, emollient\ncream_pitcher, creamer\ncreche, foundling_hospital\ncreche\ncredenza, credence\ncreel\ncrematory, crematorium, cremation_chamber\ncrematory, crematorium\ncrepe, crape\ncrepe_de_Chine\ncrescent_wrench\ncretonne\ncrib, cot\ncrib\ncricket_ball\ncricket_bat, bat\ncricket_equipment\ncringle, eyelet, loop, grommet, grummet\ncrinoline\ncrinoline\ncrochet_needle, crochet_hook\ncrock, earthenware_jar\nCrock_Pot\ncrook, shepherd's_crook\nCrookes_radiometer\nCrookes_tube\ncroquet_ball\ncroquet_equipment\ncroquet_mallet\ncross\ncrossbar\ncrossbar\ncrossbar\ncrossbench\ncross_bit\ncrossbow\ncrosscut_saw, crosscut_handsaw, cutoff_saw\ncrossjack, mizzen_course\ncrosspiece\ncrotchet\ncroupier's_rake\ncrowbar, wrecking_bar, pry, pry_bar\ncrown, diadem\ncrown, crownwork, jacket, jacket_crown, cap\ncrown_jewels\ncrown_lens\ncrow's_nest\ncrucible, melting_pot\ncrucifix, rood, rood-tree\ncruet, crewet\ncruet-stand\ncruise_control\ncruise_missile\ncruiser\ncruiser, police_cruiser, patrol_car, police_car, prowl_car, squad_car\ncruise_ship, cruise_liner\ncrupper\ncruse\ncrusher\ncrutch\ncryometer\ncryoscope\ncryostat\ncrypt\ncrystal, watch_crystal, watch_glass\ncrystal_detector\ncrystal_microphone\ncrystal_oscillator, quartz_oscillator\ncrystal_set\ncubitiere\ncucking_stool, ducking_stool\ncuckoo_clock\ncuddy\ncudgel\ncue, cue_stick, pool_cue, pool_stick\ncue_ball\ncuff, turnup\ncuirass\ncuisse\ncul, cul_de_sac, dead_end\nculdoscope\ncullis\nculotte\ncultivator, tiller\nculverin\nculverin\nculvert\ncup\ncupboard, closet\ncup_hook\ncupola\ncupola\ncurb, curb_bit\ncurb_roof\ncurbstone, kerbstone\ncurette, curet\ncurler, hair_curler, roller, crimper\ncurling_iron\ncurrycomb\ncursor, pointer\ncurtain, drape, drapery, mantle, pall\ncustomhouse, customshouse\ncutaway, cutaway_drawing, cutaway_model\ncutlas, cutlass\ncutoff\ncutout\ncutter, cutlery, cutting_tool\ncutter\ncutting_implement\ncutting_room\ncutty_stool\ncutwork\ncybercafe\ncyclopean_masonry\ncyclostyle\ncyclotron\ncylinder\ncylinder, piston_chamber\ncylinder_lock\ncymbal\ndacha\nDacron, Terylene\ndado\ndado_plane\ndagger, sticker\ndairy, dairy_farm\ndais, podium, pulpit, rostrum, ambo, stump, soapbox\ndaisy_print_wheel, daisy_wheel\ndaisywheel_printer\ndam, dike, dyke\ndamask\ndampener, moistener\ndamper, muffler\ndamper_block, piano_damper\ndark_lantern, bull's-eye\ndarkroom\ndarning_needle, embroidery_needle\ndart\ndart\ndashboard, fascia\ndashiki, daishiki\ndash-pot\ndata_converter\ndata_input_device, input_device\ndata_multiplexer\ndata_system, information_system\ndavenport\ndavenport\ndavit\ndaybed, divan_bed\ndaybook, ledger\nday_nursery, day_care_center\nday_school\ndead_axle\ndeadeye\ndeadhead\ndeanery\ndeathbed\ndeath_camp\ndeath_house, death_row\ndeath_knell, death_bell\ndeath_seat\ndeck\ndeck\ndeck_chair, beach_chair\ndeck-house\ndeckle\ndeckle_edge, deckle\ndeclinometer, transit_declinometer\ndecoder\ndecolletage\ndecoupage\ndedicated_file_server\ndeep-freeze, Deepfreeze, deep_freezer, freezer\ndeerstalker\ndefense_system, defence_system\ndefensive_structure, defense, defence\ndefibrillator\ndefilade\ndeflector\ndelayed_action\ndelay_line\ndelft\ndelicatessen, deli, food_shop\ndelivery_truck, delivery_van, panel_truck\ndelta_wing\ndemijohn\ndemitasse\nden\ndenim, dungaree, jean\ndensimeter, densitometer\ndensitometer\ndental_appliance\ndental_floss, floss\ndental_implant\ndentist's_drill, burr_drill\ndenture, dental_plate, plate\ndeodorant, deodourant\ndepartment_store, emporium\ndeparture_lounge\ndepilatory, depilator, epilator\ndepressor\ndepth_finder\ndepth_gauge, depth_gage\nderrick\nderrick\nderringer\ndesk\ndesk_phone\ndesktop_computer\ndessert_spoon\ndestroyer, guided_missile_destroyer\ndestroyer_escort\ndetached_house, single_dwelling\ndetector, sensor, sensing_element\ndetector\ndetention_home, detention_house, house_of_detention, detention_camp\ndetonating_fuse\ndetonator, detonating_device, cap\ndeveloper\ndevice\nDewar_flask, Dewar\ndhoti\ndhow\ndial, telephone_dial\ndial\ndial\ndialog_box, panel\ndial_telephone, dial_phone\ndialyzer, dialysis_machine\ndiamante\ndiaper, nappy, napkin\ndiaper\ndiaphone\ndiaphragm, stop\ndiaphragm\ndiathermy_machine\ndibble, dibber\ndice_cup, dice_box\ndicer\ndickey, dickie, dicky, shirtfront\ndickey, dickie, dicky, dickey-seat, dickie-seat, dicky-seat\nDictaphone\ndie\ndiesel, diesel_engine, diesel_motor\ndiesel-electric_locomotive, diesel-electric\ndiesel-hydraulic_locomotive, diesel-hydraulic\ndiesel_locomotive\ndiestock\ndifferential_analyzer\ndifferential_gear, differential\ndiffuser, diffusor\ndiffuser, diffusor\ndigester\ndiggings, digs, domiciliation, lodgings, pad\ndigital-analog_converter, digital-to-analog_converter\ndigital_audiotape, DAT\ndigital_camera\ndigital_clock\ndigital_computer\ndigital_display, alphanumeric_display\ndigital_subscriber_line, DSL\ndigital_voltmeter\ndigital_watch\ndigitizer, digitiser, analog-digital_converter, analog-to-digital_converter\ndilator, dilater\ndildo\ndimity\ndimmer\ndiner\ndinette\ndinghy, dory, rowboat\ndining_area\ndining_car, diner, dining_compartment, buffet_car\ndining-hall\ndining_room, dining-room\ndining-room_furniture\ndining-room_table\ndining_table, board\ndinner_bell\ndinner_dress, dinner_gown, formal, evening_gown\ndinner_jacket, tux, tuxedo, black_tie\ndinner_napkin\ndinner_pail, dinner_bucket\ndinner_table\ndinner_theater, dinner_theatre\ndiode, semiconductor_diode, junction_rectifier, crystal_rectifier\ndiode, rectifying_tube, rectifying_valve\ndip\ndiplomatic_building\ndipole, dipole_antenna\ndipper\ndipstick\nDIP_switch, dual_inline_package_switch\ndirectional_antenna\ndirectional_microphone\ndirection_finder\ndirk\ndirndl\ndirndl\ndirty_bomb\ndischarge_lamp\ndischarge_pipe\ndisco, discotheque\ndiscount_house, discount_store, discounter, wholesale_house\ndiscus, saucer\ndisguise\ndish\ndish, dish_aerial, dish_antenna, saucer\ndishpan\ndish_rack\ndishrag, dishcloth\ndishtowel, dish_towel, tea_towel\ndishwasher, dish_washer, dishwashing_machine\ndisk, disc\ndisk_brake, disc_brake\ndisk_clutch\ndisk_controller\ndisk_drive, disc_drive, hard_drive, Winchester_drive\ndiskette, floppy, floppy_disk\ndisk_harrow, disc_harrow\ndispatch_case, dispatch_box\ndispensary\ndispenser\ndisplay, video_display\ndisplay_adapter, display_adaptor\ndisplay_panel, display_board, board\ndisplay_window, shop_window, shopwindow, show_window\ndisposal, electric_pig, garbage_disposal\ndisrupting_explosive, bursting_explosive\ndistaff\ndistillery, still\ndistributor, distributer, electrical_distributor\ndistributor_cam\ndistributor_cap\ndistributor_housing\ndistributor_point, breaker_point, point\nditch\nditch_spade, long-handled_spade\nditty_bag\ndivan\ndivan, diwan\ndive_bomber\ndiverging_lens, concave_lens\ndivided_highway, dual_carriageway\ndivider\ndiving_bell\ndivining_rod, dowser, dowsing_rod, waterfinder, water_finder\ndiving_suit, diving_dress\ndixie\nDixie_cup, paper_cup\ndock, dockage, docking_facility\ndoeskin\ndogcart\ndoggie_bag, doggy_bag\ndogsled, dog_sled, dog_sleigh\ndog_wrench\ndoily, doyley, doyly\ndoll, dolly\ndollhouse, doll's_house\ndolly\ndolman\ndolman, dolman_jacket\ndolman_sleeve\ndolmen, cromlech, portal_tomb\ndome\ndome, domed_stadium, covered_stadium\ndomino, half_mask, eye_mask\ndongle\ndonkey_jacket\ndoor\ndoor\ndoor\ndoorbell, bell, buzzer\ndoorframe, doorcase\ndoorjamb, doorpost\ndoorlock\ndoormat, welcome_mat\ndoornail\ndoorplate\ndoorsill, doorstep, threshold\ndoorstop, doorstopper\nDoppler_radar\ndormer, dormer_window\ndormer_window\ndormitory, dorm, residence_hall, hall, student_residence\ndormitory, dormitory_room, dorm_room\ndosemeter, dosimeter\ndossal, dossel\ndot_matrix_printer, matrix_printer, dot_printer\ndouble_bed\ndouble-bitted_ax, double-bitted_axe, Western_ax, Western_axe\ndouble_boiler, double_saucepan\ndouble-breasted_jacket\ndouble-breasted_suit\ndouble_door\ndouble_glazing\ndouble-hung_window\ndouble_knit\ndoubler\ndouble_reed\ndouble-reed_instrument, double_reed\ndoublet\ndoubletree\ndouche, douche_bag\ndovecote, columbarium, columbary\nDover's_powder\ndovetail, dovetail_joint\ndovetail_plane\ndowel, dowel_pin, joggle\ndownstage\ndrafting_instrument\ndrafting_table, drawing_table\nDragunov\ndrainage_ditch\ndrainage_system\ndrain_basket\ndrainplug\ndrape\ndrapery\ndrawbar\ndrawbridge, lift_bridge\ndrawer\ndrawers, underdrawers, shorts, boxers, boxershorts\ndrawing_chalk\ndrawing_room, withdrawing_room\ndrawing_room\ndrawknife, drawshave\ndrawstring_bag\ndray, camion\ndreadnought, dreadnaught\ndredge\ndredger\ndredging_bucket\ndress, frock\ndress_blues, dress_whites\ndresser\ndress_hat, high_hat, opera_hat, silk_hat, stovepipe, top_hat, topper, beaver\ndressing, medical_dressing\ndressing_case\ndressing_gown, robe-de-chambre, lounging_robe\ndressing_room\ndressing_sack, dressing_sacque\ndressing_table, dresser, vanity, toilet_table\ndress_rack\ndress_shirt, evening_shirt\ndress_suit, full_dress, tailcoat, tail_coat, tails, white_tie, white_tie_and_tails\ndress_uniform\ndrift_net\ndrill\nelectric_drill\ndrilling_platform, offshore_rig\ndrill_press\ndrill_rig, drilling_rig, oilrig, oil_rig\ndrinking_fountain, water_fountain, bubbler\ndrinking_vessel\ndrip_loop\ndrip_mat\ndrip_pan\ndripping_pan, drip_pan\ndrip_pot\ndrive\ndrive\ndrive_line, drive_line_system\ndriver, number_one_wood\ndriveshaft\ndriveway, drive, private_road\ndriving_iron, one_iron\ndriving_wheel\ndrogue, drogue_chute, drogue_parachute\ndrogue_parachute\ndrone, drone_pipe, bourdon\ndrone, pilotless_aircraft, radio-controlled_aircraft\ndrop_arch\ndrop_cloth\ndrop_curtain, drop_cloth, drop\ndrop_forge, drop_hammer, drop_press\ndrop-leaf_table\ndropper, eye_dropper\ndroshky, drosky\ndrove, drove_chisel\ndrugget\ndrugstore, apothecary's_shop, chemist's, chemist's_shop, pharmacy\ndrum, membranophone, tympan\ndrum, metal_drum\ndrum_brake\ndrumhead, head\ndrum_printer\ndrum_sander, electric_sander, sander, smoother\ndrumstick\ndry_battery\ndry-bulb_thermometer\ndry_cell\ndry_dock, drydock, graving_dock\ndryer, drier\ndry_fly\ndry_kiln\ndry_masonry\ndry_point\ndry_wall, dry-stone_wall\ndual_scan_display\nduck\nduckboard\nduckpin\ndudeen\nduffel, duffle\nduffel_bag, duffle_bag, duffel, duffle\nduffel_coat, duffle_coat\ndugout\ndugout_canoe, dugout, pirogue\ndulciana\ndulcimer\ndulcimer\ndumbbell\ndumb_bomb, gravity_bomb\ndumbwaiter, food_elevator\ndumdum, dumdum_bullet\ndumpcart\nDumpster\ndump_truck, dumper, tipper_truck, tipper_lorry, tip_truck, tipper\nDumpy_level\ndunce_cap, dunce's_cap, fool's_cap\ndune_buggy, beach_buggy\ndungeon\nduplex_apartment, duplex\nduplex_house, duplex, semidetached_house\nduplicator, copier\ndust_bag, vacuum_bag\ndustcloth, dustrag, duster\ndust_cover\ndust_cover, dust_sheet\ndustmop, dust_mop, dry_mop\ndustpan\nDutch_oven\nDutch_oven\ndwelling, home, domicile, abode, habitation, dwelling_house\ndye-works\ndynamo\ndynamometer, ergometer\nEames_chair\nearflap, earlap\nearly_warning_radar\nearly_warning_system\nearmuff\nearphone, earpiece, headphone, phone\nearplug\nearplug\nearthenware\nearthwork\neasel\neasy_chair, lounge_chair, overstuffed_chair\neaves\necclesiastical_attire, ecclesiastical_robe\nechinus\nechocardiograph\nedger\nedge_tool\nefficiency_apartment\negg-and-dart, egg-and-anchor, egg-and-tongue\neggbeater, eggwhisk\negg_timer\neiderdown, duvet, continental_quilt\neight_ball\nejection_seat, ejector_seat, capsule\nelastic\nelastic_bandage\nElastoplast\nelbow\nelbow_pad\nelectric, electric_automobile, electric_car\nelectrical_cable\nelectrical_contact\nelectrical_converter\nelectrical_device\nelectrical_system\nelectric_bell\nelectric_blanket\nelectric_chair, chair, death_chair, hot_seat\nelectric_clock\nelectric-discharge_lamp, gas-discharge_lamp\nelectric_fan, blower\nelectric_frying_pan\nelectric_furnace\nelectric_guitar\nelectric_hammer\nelectric_heater, electric_fire\nelectric_lamp\nelectric_locomotive\nelectric_meter, power_meter\nelectric_mixer\nelectric_motor\nelectric_organ, electronic_organ, Hammond_organ, organ\nelectric_range\nelectric_refrigerator, fridge\nelectric_toothbrush\nelectric_typewriter\nelectro-acoustic_transducer\nelectrode\nelectrodynamometer\nelectroencephalograph\nelectrograph\nelectrolytic, electrolytic_capacitor, electrolytic_condenser\nelectrolytic_cell\nelectromagnet\nelectrometer\nelectromyograph\nelectron_accelerator\nelectron_gun\nelectronic_balance\nelectronic_converter\nelectronic_device\nelectronic_equipment\nelectronic_fetal_monitor, electronic_foetal_monitor, fetal_monitor, foetal_monitor\nelectronic_instrument, electronic_musical_instrument\nelectronic_voltmeter\nelectron_microscope\nelectron_multiplier\nelectrophorus\nelectroscope\nelectrostatic_generator, electrostatic_machine, Wimshurst_machine, Van_de_Graaff_generator\nelectrostatic_printer\nelevator, lift\nelevator\nelevator_shaft\nembankment\nembassy\nembellishment\nemergency_room, ER\nemesis_basin\nemitter\nempty\nemulsion, photographic_emulsion\nenamel\nenamel\nenamelware\nencaustic\nencephalogram, pneumoencephalogram\nenclosure\nendoscope\nenergizer, energiser\nengine\nengine\nengineering, engine_room\nenginery\nEnglish_horn, cor_anglais\nEnglish_saddle, English_cavalry_saddle\nenlarger\nensemble\nensign\nentablature\nentertainment_center\nentrenching_tool, trenching_spade\nentrenchment, intrenchment\nenvelope\nenvelope\nenvelope, gasbag\neolith\nepauliere\nepee\nepergne\nepicyclic_train, epicyclic_gear_train\nepidiascope\nepilating_wax\nequalizer, equaliser\nequatorial\nequipment\nerasable_programmable_read-only_memory, EPROM\neraser\nerecting_prism\nerection\nErlenmeyer_flask\nescape_hatch\nescapement\nescape_wheel\nescarpment, escarp, scarp, protective_embankment\nescutcheon, scutcheon\nesophagoscope, oesophagoscope\nespadrille\nespalier\nespresso_maker\nespresso_shop\nestablishment\nestaminet\nestradiol_patch\netagere\netamine, etamin\netching\nethernet\nethernet_cable\nEton_jacket\netui\neudiometer\neuphonium\nevaporative_cooler\nevening_bag\nexercise_bike, exercycle\nexercise_device\nexhaust, exhaust_system\nexhaust_fan\nexhaust_valve\nexhibition_hall, exhibition_area\nExocet\nexpansion_bit, expansive_bit\nexpansion_bolt\nexplosive_detection_system, EDS\nexplosive_device\nexplosive_trace_detection, ETD\nexpress, limited\nextension, telephone_extension, extension_phone\nextension_cord\nexternal-combustion_engine\nexternal_drive\nextractor\neyebrow_pencil\neyecup, eyebath, eye_cup\neyeliner\neyepatch, patch\neyepiece, ocular\neyeshadow\nfabric, cloth, material, textile\nfacade, frontage, frontal\nface_guard\nface_mask\nfaceplate\nface_powder\nface_veil\nfacing, cladding\nfacing\nfacing, veneer\nfacsimile, facsimile_machine, fax\nfactory, mill, manufacturing_plant, manufactory\nfactory_ship\nfagot, faggot\nfagot_stitch, faggot_stitch\nFahrenheit_thermometer\nfaience\nfaille\nfairlead\nfairy_light\nfalchion\nfallboard, fall-board\nfallout_shelter\nfalse_face\nfalse_teeth\nfamily_room\nfan\nfan_belt\nfan_blade\nfancy_dress, masquerade, masquerade_costume\nfanion\nfanlight\nfanjet, fan-jet, fanjet_engine, turbojet, turbojet_engine, turbofan, turbofan_engine\nfanjet, fan-jet, turbofan, turbojet\nfanny_pack, butt_pack\nfan_tracery\nfan_vaulting\nfarm_building\nfarmer's_market, green_market, greenmarket\nfarmhouse\nfarm_machine\nfarmplace, farm-place, farmstead\nfarmyard\nfarthingale\nfastener, fastening, holdfast, fixing\nfast_reactor\nfat_farm\nfatigues\nfaucet, spigot\nfauld\nfauteuil\nfeather_boa, boa\nfeatheredge\nfedora, felt_hat, homburg, Stetson, trilby\nfeedback_circuit, feedback_loop\nfeedlot\nfell, felled_seam\nfelloe, felly\nfelt\nfelt-tip_pen, felt-tipped_pen, felt_tip, Magic_Marker\nfelucca\nfence, fencing\nfencing_mask, fencer's_mask\nfencing_sword\nfender, wing\nfender, buffer, cowcatcher, pilot\nFerris_wheel\nferrule, collet\nferry, ferryboat\nferule\nfestoon\nfetoscope, foetoscope\nfetter, hobble\nfez, tarboosh\nfiber, fibre, vulcanized_fiber\nfiber_optic_cable, fibre_optic_cable\nfiberscope\nfichu\nfiddlestick, violin_bow\nfield_artillery, field_gun\nfield_coil, field_winding\nfield-effect_transistor, FET\nfield-emission_microscope\nfield_glass, glass, spyglass\nfield_hockey_ball\nfield_hospital\nfield_house, sports_arena\nfield_lens\nfield_magnet\nfield-sequential_color_television, field-sequential_color_TV, field-sequential_color_television_system, field-sequential_color_TV_system\nfield_tent\nfieldwork\nfife\nfifth_wheel, spare\nfighter, fighter_aircraft, attack_aircraft\nfighting_chair\nfig_leaf\nfigure_eight, figure_of_eight\nfigure_loom, figured-fabric_loom\nfigure_skate\nfilament\nfilature\nfile\nfile, file_cabinet, filing_cabinet\nfile_folder\nfile_server\nfiligree, filagree, fillagree\nfilling\nfilm, photographic_film\nfilm, plastic_film\nfilm_advance\nfilter\nfilter\nfinder, viewfinder, view_finder\nfinery\nfine-tooth_comb, fine-toothed_comb\nfinger\nfingerboard\nfinger_bowl\nfinger_paint, fingerpaint\nfinger-painting\nfinger_plate, escutcheon, scutcheon\nfingerstall, cot\nfinish_coat, finishing_coat\nfinish_coat, finishing_coat\nfinisher\nfin_keel\nfipple\nfipple_flute, fipple_pipe, recorder, vertical_flute\nfire\nfire_alarm, smoke_alarm\nfirearm, piece, small-arm\nfire_bell\nfireboat\nfirebox\nfirebrick\nfire_control_radar\nfire_control_system\nfire_engine, fire_truck\nfire_extinguisher, extinguisher, asphyxiator\nfire_iron\nfireman's_ax, fireman's_axe\nfireplace, hearth, open_fireplace\nfire_screen, fireguard\nfire_tongs, coal_tongs\nfire_tower\nfirewall\nfiring_chamber, gun_chamber\nfiring_pin\nfirkin\nfirmer_chisel\nfirst-aid_kit\nfirst-aid_station\nfirst_base\nfirst_class\nfishbowl, fish_bowl, goldfish_bowl\nfisherman's_bend\nfisherman's_knot, true_lover's_knot, truelove_knot\nfisherman's_lure, fish_lure\nfishhook\nfishing_boat, fishing_smack, fishing_vessel\nfishing_gear, tackle, fishing_tackle, fishing_rig, rig\nfishing_rod, fishing_pole\nfish_joint\nfish_knife\nfishnet, fishing_net\nfish_slice\nfitment\nfixative\nfixer-upper\nflag\nflageolet, treble_recorder, shepherd's_pipe\nflagon\nflagpole, flagstaff\nflagship\nflail\nflambeau\nflamethrower\nflange, rim\nflannel\nflannel, gabardine, tweed, white\nflannelette\nflap, flaps\nflash, photoflash, flash_lamp, flashgun, flashbulb, flash_bulb\nflash\nflash_camera\nflasher\nflashlight, torch\nflashlight_battery\nflash_memory\nflask\nflat_arch, straight_arch\nflatbed\nflatbed_press, cylinder_press\nflat_bench\nflatcar, flatbed, flat\nflat_file\nflatlet\nflat_panel_display, FPD\nflats\nflat_tip_screwdriver\nfleece\nfleet_ballistic_missile_submarine\nfleur-de-lis, fleur-de-lys\nflight_simulator, trainer\nflintlock\nflintlock, firelock\nflip-flop, thong\nflipper, fin\nfloat, plasterer's_float\nfloating_dock, floating_dry_dock\nfloatplane, pontoon_plane\nflood, floodlight, flood_lamp, photoflood\nfloor, flooring\nfloor, level, storey, story\nfloor\nfloorboard\nfloor_cover, floor_covering\nfloor_joist\nfloor_lamp\nflophouse, dosshouse\nflorist, florist_shop, flower_store\nfloss\nflotsam, jetsam\nflour_bin\nflour_mill\nflowerbed, flower_bed, bed_of_flowers\nflugelhorn, fluegelhorn\nfluid_drive\nfluid_flywheel\nflume\nfluorescent_lamp\nfluoroscope, roentgenoscope\nflush_toilet, lavatory\nflute, transverse_flute\nflute, flute_glass, champagne_flute\nflux_applicator\nfluxmeter\nfly\nflying_boat\nflying_buttress, arc-boutant\nflying_carpet\nflying_jib\nfly_rod\nfly_tent\nflytrap\nflywheel\nfob, watch_chain, watch_guard\nfoghorn\nfoglamp\nfoil\nfold, sheepfold, sheep_pen, sheepcote\nfolder\nfolding_chair\nfolding_door, accordion_door\nfolding_saw\nfood_court\nfood_processor\nfood_hamper\nfoot\nfootage\nfootball\nfootball_helmet\nfootball_stadium\nfootbath\nfoot_brake\nfootbridge, overcrossing, pedestrian_bridge\nfoothold, footing\nfootlocker, locker\nfoot_rule\nfootstool, footrest, ottoman, tuffet\nfootwear, footgear\nfootwear\nforceps\nforce_pump\nfore-and-after\nfore-and-aft_sail\nforecastle, fo'c'sle\nforecourt\nforedeck\nfore_edge, foredge\nforeground\nforemast\nfore_plane\nforesail\nforestay\nforetop\nfore-topmast\nfore-topsail\nforge\nfork\nforklift\nformalwear, eveningwear, evening_dress, evening_clothes\nFormica\nfortification, munition\nfortress, fort\nforty-five\nFoucault_pendulum\nfoulard\nfoul-weather_gear\nfoundation_garment, foundation\nfoundry, metalworks\nfountain\nfountain_pen\nfour-in-hand\nfour-poster\nfour-pounder\nfour-stroke_engine, four-stroke_internal-combustion_engine\nfour-wheel_drive, 4WD\nfour-wheel_drive, 4WD\nfour-wheeler\nfowling_piece\nfoxhole, fox_hole\nfragmentation_bomb, antipersonnel_bomb, anti-personnel_bomb, daisy_cutter\nfrail\nfraise\nframe, framing\nframe\nframe_buffer\nframework\nFrancis_turbine\nfranking_machine\nfree_house\nfree-reed\nfree-reed_instrument\nfreewheel\nfreight_car\nfreight_elevator, service_elevator\nfreight_liner, liner_train\nfreight_train, rattler\nFrench_door\nFrench_horn, horn\nFrench_polish, French_polish_shellac\nFrench_roof\nFrench_window\nFresnel_lens\nfret\nfriary\nfriction_clutch\nfrieze\nfrieze\nfrigate\nfrigate\nfrill, flounce, ruffle, furbelow\nFrisbee\nfrock\nfrock_coat\nfrontlet, frontal\nfront_porch\nfront_projector\nfruit_machine\nfrying_pan, frypan, skillet\nfuel_filter\nfuel_gauge, fuel_indicator\nfuel_injection, fuel_injection_system\nfuel_system\nfull-dress_uniform\nfull_metal_jacket\nfull_skirt\nfumigator\nfuneral_home, funeral_parlor, funeral_parlour, funeral_chapel, funeral_church, funeral-residence\nfunnel\nfunny_wagon\nfur\nfur_coat\nfur_hat\nfurnace\nfurnace_lining, refractory\nfurnace_room\nfurnishing\nfurnishing, trappings\nfurniture, piece_of_furniture, article_of_furniture\nfur-piece\nfurrow\nfuse, electrical_fuse, safety_fuse\nfusee_drive, fusee\nfuselage\nfusil\nfustian\nfuton\ngabardine\ngable, gable_end, gable_wall\ngable_roof, saddle_roof, saddleback, saddleback_roof\ngadgetry\ngaff\ngaff\ngaff\ngaffsail, gaff-headed_sail\ngaff_topsail, fore-and-aft_topsail\ngag, muzzle\ngaiter\ngaiter\nGalilean_telescope\ngalleon\ngallery\ngallery, art_gallery, picture_gallery\ngalley, ship's_galley, caboose, cookhouse\ngalley\ngalley\ngallows\ngallows_tree, gallows-tree, gibbet, gallous\ngalvanometer\ngambling_house, gambling_den, gambling_hell, gaming_house\ngambrel, gambrel_roof\ngame\ngamebag\ngame_equipment\ngaming_table\ngamp, brolly\ngangplank, gangboard, gangway\ngangsaw\ngangway\ngantlet\ngantry, gauntry\ngarage\ngarage, service_department\nGarand_rifle, Garand, M-1, M-1_rifle\ngarbage\ngarbage_truck, dustcart\ngarboard, garboard_plank, garboard_strake\ngarden\ngarden\ngarden_rake\ngarden_spade\ngarden_tool, lawn_tool\ngarden_trowel\ngargoyle\ngaribaldi\ngarlic_press\ngarment\ngarment_bag\ngarrison_cap, overseas_cap\ngarrote, garotte, garrotte, iron_collar\ngarter, supporter\ngarter_belt, suspender_belt\ngarter_stitch\ngas_guzzler\ngas_shell\ngas_bracket\ngas_burner, gas_jet\ngas-cooled_reactor\ngas-discharge_tube\ngas_engine\ngas_fixture\ngas_furnace\ngas_gun\ngas_heater\ngas_holder, gasometer\ngasket\ngas_lamp\ngas_maser\ngasmask, respirator, gas_helmet\ngas_meter, gasometer\ngasoline_engine, petrol_engine\ngasoline_gauge, gasoline_gage, gas_gauge, gas_gage, petrol_gauge, petrol_gage\ngas_oven\ngas_oven\ngas_pump, gasoline_pump, petrol_pump, island_dispenser\ngas_range, gas_stove, gas_cooker\ngas_ring\ngas_tank, gasoline_tank, petrol_tank\ngas_thermometer, air_thermometer\ngastroscope\ngas_turbine\ngas-turbine_ship\ngat, rod\ngate\ngatehouse\ngateleg_table\ngatepost\ngathered_skirt\nGatling_gun\ngauge, gage\ngauntlet, gantlet\ngauntlet, gantlet, metal_glove\ngauze, netting, veiling\ngauze, gauze_bandage\ngavel\ngazebo, summerhouse\ngear, gear_wheel, geared_wheel, cogwheel\ngear, paraphernalia, appurtenance\ngear, gear_mechanism\ngearbox, gear_box, gear_case\ngearing, gear, geartrain, power_train, train\ngearset\ngearshift, gearstick, shifter, gear_lever\nGeiger_counter, Geiger-Muller_counter\nGeiger_tube, Geiger-Muller_tube\ngene_chip, DNA_chip\ngeneral-purpose_bomb, GP_bomb\ngenerator\ngenerator\ngenerator\nGeneva_gown\ngeodesic_dome\ngeorgette\ngharry\nghat\nghetto_blaster, boom_box\ngift_shop, novelty_shop\ngift_wrapping\ngig\ngig\ngig\ngig\ngildhall\ngill_net\ngilt, gilding\ngimbal\ngingham\ngirandole, girandola\ngirder\ngirdle, cincture, sash, waistband, waistcloth\nglass, drinking_glass\nglass\nglass_cutter\nglasses_case\nglebe_house\nGlengarry\nglider, sailplane\nGlobal_Positioning_System, GPS\nglockenspiel, orchestral_bells\nglory_hole, lazaretto\nglove\nglove_compartment\nglow_lamp\nglow_tube\nglyptic_art, glyptography\nglyptics, lithoglyptics\ngnomon\ngoal\ngoalmouth\ngoalpost\ngoblet\ngodown\ngoggles\ngo-kart\ngold_plate\ngolf_bag\ngolf_ball\ngolfcart, golf_cart\ngolf_club, golf-club, club\ngolf-club_head, club_head, club-head, clubhead\ngolf_equipment\ngolf_glove\ngolliwog, golliwogg\ngondola\ngong, tam-tam\ngoniometer\nGordian_knot\ngorget\ngossamer\nGothic_arch\ngouache\ngouge\ngourd, calabash\ngovernment_building\ngovernment_office\ngown\ngown, robe\ngown, surgical_gown, scrubs\ngrab\ngrab_bag\ngrab_bar\ngrace_cup\ngrade_separation\ngraduated_cylinder\ngraffito, graffiti\ngramophone, acoustic_gramophone\ngranary, garner\ngrandfather_clock, longcase_clock\ngrand_piano, grand\ngraniteware\ngranny_knot, granny\ngrape_arbor, grape_arbour\ngrapnel, grapnel_anchor\ngrapnel, grapple, grappler, grappling_hook, grappling_iron\ngrass_skirt\ngrate, grating\ngrate, grating\ngrater\ngraver, graving_tool, pointel, pointrel\ngravestone, headstone, tombstone\ngravimeter, gravity_meter\ngravure, photogravure, heliogravure\ngravy_boat, gravy_holder, sauceboat, boat\ngrey, gray\ngrease-gun, gun\ngreasepaint\ngreasy_spoon\ngreatcoat, overcoat, topcoat\ngreat_hall\ngreave, jambeau\ngreengrocery\ngreenhouse, nursery, glasshouse\ngrenade\ngrid, gridiron\ngriddle\ngrill, grille, grillwork\ngrille, radiator_grille\ngrillroom, grill\ngrinder\ngrinding_wheel, emery_wheel\ngrindstone\ngripsack\ngristmill\ngrocery_bag\ngrocery_store, grocery, food_market, market\ngrogram\ngroined_vault\ngroover\ngrosgrain\ngros_point\nground, earth\nground_bait\nground_control\nground_floor, first_floor, ground_level\ngroundsheet, ground_cloth\nG-string, thong\nguard, safety, safety_device\nguard_boat\nguardroom\nguardroom\nguard_ship\nguard's_van\ngueridon\nGuarnerius\nguesthouse\nguestroom\nguidance_system, guidance_device\nguided_missile\nguided_missile_cruiser\nguided_missile_frigate\nguildhall\nguilloche\nguillotine\nguimpe\nguimpe\nguitar\nguitar_pick\ngulag\ngun\ngunboat\ngun_carriage\ngun_case\ngun_emplacement, weapons_emplacement\ngun_enclosure, gun_turret, turret\ngunlock, firing_mechanism\ngunnery\ngunnysack, gunny_sack, burlap_bag\ngun_pendulum\ngun_room\ngunsight, gun-sight\ngun_trigger, trigger\ngurney\ngusher\ngusset, inset\ngusset, gusset_plate\nguy, guy_cable, guy_wire, guy_rope\ngymnastic_apparatus, exerciser\ngym_shoe, sneaker, tennis_shoe\ngym_suit\ngymslip\ngypsy_cab\ngyrocompass\ngyroscope, gyro\ngyrostabilizer, gyrostabiliser\nhabergeon\nhabit\nhabit, riding_habit\nhacienda\nhacksaw, hack_saw, metal_saw\nhaft, helve\nhairbrush\nhaircloth, hair\nhairdressing, hair_tonic, hair_oil, hair_grease\nhairnet\nhairpiece, false_hair, postiche\nhairpin\nhair_shirt\nhair_slide\nhair_spray\nhairspring\nhair_trigger\nhalberd\nhalf_binding\nhalf_hatchet\nhalf_hitch\nhalf_track\nhall\nhall\nhall\nHall_of_Fame\nhall_of_residence\nhallstand\nhalter\nhalter, hackamore\nhame\nhammer\nhammer, power_hammer\nhammer\nhammerhead\nhammock, sack\nhamper\nhand\nhandball\nhandbarrow\nhandbell\nhand_blower, blow_dryer, blow_drier, hair_dryer, hair_drier\nhandbow\nhand_brake, emergency, emergency_brake, parking_brake\nhand_calculator, pocket_calculator\nhandcar\nhandcart, pushcart, cart, go-cart\nhand_cream\nhandcuff, cuff, handlock, manacle\nhand_drill, handheld_drill\nhand_glass, simple_microscope, magnifying_glass\nhand_glass, hand_mirror\nhand_grenade\nhand-held_computer, hand-held_microcomputer\nhandhold\nhandkerchief, hankie, hanky, hankey\nhandlebar\nhandloom\nhand_lotion\nhand_luggage\nhand-me-down\nhand_mower\nhand_pump\nhandrest\nhandsaw, hand_saw, carpenter's_saw\nhandset, French_telephone\nhand_shovel\nhandspike\nhandstamp, rubber_stamp\nhand_throttle\nhand_tool\nhand_towel, face_towel\nhand_truck, truck\nhandwear, hand_wear\nhandwheel\nhandwheel\nhangar_queen\nhanger\nhang_glider\nhangman's_rope, hangman's_halter, halter, hemp, hempen_necktie\nhank\nhansom, hansom_cab\nharbor, harbour\nhard_disc, hard_disk, fixed_disk\nhard_hat, tin_hat, safety_hat\nhardtop\nhardware, ironware\nhardware_store, ironmonger, ironmonger's_shop\nharmonica, mouth_organ, harp, mouth_harp\nharmonium, organ, reed_organ\nharness\nharness\nharp\nharp\nharpoon\nharpoon_gun\nharpoon_log\nharpsichord, cembalo\nHarris_Tweed\nharrow\nharvester, reaper\nhash_house\nhasp\nhat, chapeau, lid\nhatbox\nhatch\nhatchback, hatchback_door\nhatchback\nhatchel, heckle\nhatchet\nhatpin\nhauberk, byrnie\nHawaiian_guitar, steel_guitar\nhawse, hawsehole, hawsepipe\nhawser\nhawser_bend\nhay_bale\nhayfork\nhayloft, haymow, mow\nhaymaker, hay_conditioner\nhayrack, hayrig\nhayrack\nhazard\nhead\nhead\nhead\nheadboard\nhead_covering, veil\nheaddress, headgear\nheader\nheader\nheader, coping, cope\nheader, lintel\nheadfast\nhead_gasket\nhead_gate\nheadgear\nheadlight, headlamp\nheadpiece\nheadpin, kingpin\nheadquarters, central_office, main_office, home_office, home_base\nheadrace\nheadrest\nheadsail\nheadscarf\nheadset\nhead_shop\nheadstall, headpiece\nheadstock\nhealth_spa, spa, health_club\nhearing_aid, ear_trumpet\nhearing_aid, deaf-aid\nhearse\nhearth, fireside\nhearthrug\nheart-lung_machine\nheat_engine\nheater, warmer\nheat_exchanger\nheating_pad, hot_pad\nheat_lamp, infrared_lamp\nheat_pump\nheat-seeking_missile\nheat_shield\nheat_sink\nheaume\nheaver\nheavier-than-air_craft\nheckelphone, basset_oboe\nhectograph, heliotype\nhedge, hedgerow\nhedge_trimmer\nhelicon, bombardon\nhelicopter, chopper, whirlybird, eggbeater\nheliograph\nheliometer\nhelm\nhelmet\nhelmet\nhematocrit, haematocrit\nhemming-stitch\nhemostat, haemostat\nhemstitch, hemstitching\nhenroost\nheraldry\nhermitage\nherringbone\nherringbone, herringbone_pattern\nHerschelian_telescope, off-axis_reflector\nHessian_boot, hessian, jackboot, Wellington, Wellington_boot\nheterodyne_receiver, superheterodyne_receiver, superhet\nhibachi\nhideaway, retreat\nhi-fi, high_fidelity_sound_system\nhigh_altar\nhigh-angle_gun\nhighball_glass\nhighboard\nhighboy, tallboy\nhighchair, feeding_chair\nhigh_gear, high\nhigh-hat_cymbal, high_hat\nhighlighter\nhighlighter\nhigh-pass_filter\nhigh-rise, tower_block\nhigh_table\nhigh-warp_loom\nhijab\nhinge, flexible_joint\nhinging_post, swinging_post\nhip_boot, thigh_boot\nhipflask, pocket_flask\nhip_pad\nhip_pocket\nhippodrome\nhip_roof, hipped_roof\nhitch\nhitch\nhitching_post\nhitchrack, hitching_bar\nhob\nhobble_skirt\nhockey_skate\nhockey_stick\nhod\nhodoscope\nhoe\nhoe_handle\nhogshead\nhoist\nhold, keep\nholder\nholding_cell\nholding_device\nholding_pen, holding_paddock, holding_yard\nhollowware, holloware\nholster\nholster\nholy_of_holies, sanctum_sanctorum\nhome, nursing_home, rest_home\nhome_appliance, household_appliance\nhome_computer\nhome_plate, home_base, home, plate\nhome_room, homeroom\nhomespun\nhomestead\nhome_theater, home_theatre\nhoming_torpedo\nhone\nhoneycomb\nhood, bonnet, cowl, cowling\nhood\nhood\nhood, exhaust_hood\nhood\nhood_latch\nhook\nhook, claw\nhook\nhookah, narghile, nargileh, sheesha, shisha, chicha, calean, kalian, water_pipe, hubble-bubble, hubbly-bubbly\nhook_and_eye\nhookup, assemblage\nhookup\nhook_wrench, hook_spanner\nhoopskirt, crinoline\nhoosegow, hoosgow\nHoover\nhope_chest, wedding_chest\nhopper\nhopsacking, hopsack\nhorizontal_bar, high_bar\nhorizontal_stabilizer, horizontal_stabiliser, tailplane\nhorizontal_tail\nhorn\nhorn\nhorn\nhorn_button\nhornpipe, pibgorn, stockhorn\nhorse, gymnastic_horse\nhorsebox\nhorsecar\nhorse_cart, horse-cart\nhorsecloth\nhorse-drawn_vehicle\nhorsehair\nhorsehair_wig\nhorseless_carriage\nhorse_pistol, horse-pistol\nhorseshoe, shoe\nhorseshoe\nhorse-trail\nhorsewhip\nhose\nhosiery, hose\nhospice\nhospital, infirmary\nhospital_bed\nhospital_room\nhospital_ship\nhospital_train\nhostel, youth_hostel, student_lodging\nhostel, hostelry, inn, lodge, auberge\nhot-air_balloon\nhotel\nhotel-casino, casino-hotel\nhotel-casino, casino-hotel\nhotel_room\nhot_line\nhot_pants\nhot_plate, hotplate\nhot_rod, hot-rod\nhot_spot, hotspot\nhot_tub\nhot-water_bottle, hot-water_bag\nhoundstooth_check, hound's-tooth_check, dogstooth_check, dogs-tooth_check, dog's-tooth_check\nhourglass\nhour_hand, little_hand\nhouse\nhouse\nhouseboat\nhouselights\nhouse_of_cards, cardhouse, card-house, cardcastle\nhouse_of_correction\nhouse_paint, housepaint\nhousetop\nhousing, lodging, living_accommodations\nhovel, hut, hutch, shack, shanty\nhovercraft, ground-effect_machine\nhowdah, houdah\nhuarache, huaraches\nhub-and-spoke, hub-and-spoke_system\nhubcap\nhuck, huckaback\nhug-me-tight\nhula-hoop\nhulk\nhull\nhumeral_veil, veil\nHumvee, Hum-Vee\nhunter, hunting_watch\nhunting_knife\nhurdle\nhurricane_deck, hurricane_roof, promenade_deck, awning_deck\nhurricane_lamp, hurricane_lantern, tornado_lantern, storm_lantern, storm_lamp\nhut, army_hut, field_hut\nhutch\nhutment\nhydraulic_brake, hydraulic_brakes\nhydraulic_press\nhydraulic_pump, hydraulic_ram\nhydraulic_system\nhydraulic_transmission, hydraulic_transmission_system\nhydroelectric_turbine\nhydrofoil, hydroplane\nhydrofoil, foil\nhydrogen_bomb, H-bomb, fusion_bomb, thermonuclear_bomb\nhydrometer, gravimeter\nhygrodeik\nhygrometer\nhygroscope\nhyperbaric_chamber\nhypercoaster\nhypermarket\nhypodermic_needle\nhypodermic_syringe, hypodermic, hypo\nhypsometer\nhysterosalpingogram\nI-beam\nice_ax, ice_axe, piolet\niceboat, ice_yacht, scooter\nicebreaker, iceboat\niced-tea_spoon\nice_hockey_rink, ice-hockey_rink\nice_machine\nice_maker\nice_pack, ice_bag\nicepick, ice_pick\nice_rink, ice-skating_rink, ice\nice_skate\nice_tongs\nicetray\niconoscope\nIdentikit, Identikit_picture\nidle_pulley, idler_pulley, idle_wheel\nigloo, iglu\nignition_coil\nignition_key\nignition_switch\nimaret\nimmovable_bandage\nimpact_printer\nimpeller\nimplant\nimplement\nimpression\nimprint\nimprovised_explosive_device, I.E.D., IED\nimpulse_turbine\nin-basket, in-tray\nincendiary_bomb, incendiary, firebomb\nincinerator\ninclined_plane\ninclinometer, dip_circle\ninclinometer\nincrustation, encrustation\nincubator, brooder\nindex_register\nIndiaman\nIndian_club\nindicator\ninduction_coil\ninductor, inductance\nindustrial_watercourse\ninertial_guidance_system, inertial_navigation_system\ninflater, inflator\ninhaler, inhalator\ninjector\nink_bottle, inkpot\nink_eraser\nink-jet_printer\ninkle\ninkstand\ninkwell, inkstand\ninlay\ninside_caliper\ninsole, innersole\ninstep\ninstillator\ninstitution\ninstrument\ninstrument_of_punishment\ninstrument_of_torture\nintaglio, diaglyph\nintake_valve\nintegrated_circuit, microcircuit\nintegrator, planimeter\nIntelnet\ninterceptor\ninterchange\nintercommunication_system, intercom\nintercontinental_ballistic_missile, ICBM\ninterface, port\ninterferometer\ninterior_door\ninternal-combustion_engine, ICE\ninternal_drive\ninternet, net, cyberspace\ninterphone\ninterrupter\nintersection, crossroad, crossway, crossing, carrefour\ninterstice\nintraocular_lens\nintravenous_pyelogram, IVP\ninverter\nion_engine\nionization_chamber, ionization_tube\niPod\nvideo_iPod\niron, smoothing_iron\niron\niron, branding_iron\nirons, chains\nironclad\niron_foundry\niron_horse\nironing\niron_lung\nironmongery\nironworks\nirrigation_ditch\nizar\njabot\njack\njack, jackstones\njack\njack\njacket\njacket\njacket\njack-in-the-box\njack-o'-lantern\njack_plane\nJacob's_ladder, jack_ladder, pilot_ladder\njaconet\nJacquard_loom, Jacquard\njacquard\njag, dag\njail, jailhouse, gaol, clink, slammer, poky, pokey\njalousie\njamb\njammer\njampot, jamjar\njapan\njar\nJarvik_heart, Jarvik_artificial_heart\njaunting_car, jaunty_car\njavelin\njaw\nJaws_of_Life\njean, blue_jean, denim\njeep, landrover\njellaba\njerkin\njeroboam, double-magnum\njersey\njersey, T-shirt, tee_shirt\njet, jet_plane, jet-propelled_plane\njet_bridge\njet_engine\njetliner\njeweler's_glass\njewelled_headdress, jeweled_headdress\njew's_harp, jews'_harp, mouth_bow\njib\njibboom\njig\njig\njiggermast, jigger\njigsaw, scroll_saw, fretsaw\njigsaw_puzzle\njinrikisha, ricksha, rickshaw\njobcentre\njodhpurs, jodhpur_breeches, riding_breeches\njodhpur, jodhpur_boot, jodhpur_shoe\njoinery\njoint\nJoint_Direct_Attack_Munition, JDAM\njointer, jointer_plane, jointing_plane, long_plane\njoist\njolly_boat, jolly\njorum\njoss_house\njournal_bearing\njournal_box\njoystick\njungle_gym\njunk\njug\njukebox, nickelodeon\njumbojet, jumbo_jet\njumper, pinafore, pinny\njumper\njumper\njumper\njumper_cable, jumper_lead, lead, booster_cable\njump_seat\njump_suit\njump_suit, jumpsuit\njunction\njunction, conjunction\njunction_barrier, barrier_strip\njunk_shop\njury_box\njury_mast\nkachina\nkaffiyeh\nkalansuwa\nKalashnikov\nkameez\nkanzu\nkatharometer\nkayak\nkazoo\nkeel\nkeelboat\nkeelson\nkeep, donjon, dungeon\nkeg\nkennel, doghouse, dog_house\nkepi, peaked_cap, service_cap, yachting_cap\nkeratoscope\nkerchief\nketch\nkettle, boiler\nkettle, kettledrum, tympanum, tympani, timpani\nkey\nkey\nkeyboard\nkeyboard_buffer\nkeyboard_instrument\nkeyhole\nkeyhole_saw\nkhadi, khaddar\nkhaki\nkhakis\nkhimar\nkhukuri\nkick_pleat\nkicksorter, pulse_height_analyzer\nkickstand\nkick_starter, kick_start\nkid_glove, suede_glove\nkiln\nkilt\nkimono\nkinescope, picture_tube, television_tube\nKinetoscope\nking\nking\nkingbolt, kingpin, swivel_pin\nking_post\nKipp's_apparatus\nkirk\nkirpan\nkirtle\nkirtle\nkit, outfit\nkit\nkitbag, kit_bag\nkitchen\nkitchen_appliance\nkitchenette\nkitchen_table\nkitchen_utensil\nkitchenware\nkite_balloon\nklaxon, claxon\nklieg_light\nklystron\nknee_brace\nknee-high, knee-hi\nknee_pad\nknee_piece\nknife\nknife\nknife_blade\nknight, horse\nknit\nknitting_machine\nknitting_needle\nknitwear\nknob, boss\nknob, pommel\nknobble\nknobkerrie, knobkerry\nknocker, doorknocker, rapper\nknot\nknuckle_joint, hinge_joint\nkohl\nkoto\nkraal\nkremlin\nkris, creese, crease\nkrummhorn, crumhorn, cromorne\nKundt's_tube\nKurdistan\nkurta\nkylix, cylix\nkymograph, cymograph\nlab_bench, laboratory_bench\nlab_coat, laboratory_coat\nlace\nlacquer\nlacquerware\nlacrosse_ball\nladder-back\nladder-back, ladder-back_chair\nladder_truck, aerial_ladder_truck\nladies'_room, powder_room\nladle\nlady_chapel\nlagerphone\nlag_screw, lag_bolt\nlake_dwelling, pile_dwelling\nlally, lally_column\nlamasery\nlambrequin\nlame\nlaminar_flow_clean_room\nlaminate\nlamination\nlamp\nlamp\nlamp_house, lamphouse, lamp_housing\nlamppost\nlampshade, lamp_shade\nlanai\nlancet_arch, lancet\nlancet_window\nlandau\nlander\nlanding_craft\nlanding_flap\nlanding_gear\nlanding_net\nlanding_skid\nland_line, landline\nland_mine, ground-emplaced_mine, booby_trap\nland_office\nlanolin\nlantern\nlanyard, laniard\nlap, lap_covering\nlaparoscope\nlapboard\nlapel\nlap_joint, splice\nlaptop, laptop_computer\nlaryngoscope\nlaser, optical_maser\nlaser-guided_bomb, LGB\nlaser_printer\nlash, thong\nlashing\nlasso, lariat, riata, reata\nlatch\nlatch, door_latch\nlatchet\nlatchkey\nlateen, lateen_sail\nlatex_paint, latex, rubber-base_paint\nlath\nlathe\nlatrine\nlattice, latticework, fretwork\nlaunch\nlauncher, rocket_launcher\nlaundry, wash, washing, washables\nlaundry_cart\nlaundry_truck\nlavalava\nlavaliere, lavalier, lavalliere\nlaver\nlawn_chair, garden_chair\nlawn_furniture\nlawn_mower, mower\nlayette\nlead-acid_battery, lead-acid_accumulator\nlead-in\nleading_rein\nlead_pencil\nleaf_spring\nlean-to\nlean-to_tent\nleash, tether, lead\nleatherette, imitation_leather\nleather_strip\nLeclanche_cell\nlectern, reading_desk\nlecture_room\nlederhosen\nledger_board\nleg\nleg\nlegging, leging, leg_covering\nLeiden_jar, Leyden_jar\nleisure_wear\nlens, lense, lens_system\nlens, electron_lens\nlens_cap, lens_cover\nlens_implant, interocular_lens_implant, IOL\nleotard, unitard, body_suit, cat_suit\nletter_case\nletter_opener, paper_knife, paperknife\nlevee\nlevel, spirit_level\nlever\nlever, lever_tumbler\nlever\nlever_lock\nLevi's, levis\nLiberty_ship\nlibrary\nlibrary\nlid\nLiebig_condenser\nlie_detector\nlifeboat\nlife_buoy, lifesaver, life_belt, life_ring\nlife_jacket, life_vest, cork_jacket\nlife_office\nlife_preserver, preserver, flotation_device\nlife-support_system, life_support\nlife-support_system, life_support\nlifting_device\nlift_pump\nligament\nligature\nlight, light_source\nlight_arm\nlight_bulb, lightbulb, bulb, incandescent_lamp, electric_light, electric-light_bulb\nlight_circuit, lighting_circuit\nlight-emitting_diode, LED\nlighter, light, igniter, ignitor\nlighter-than-air_craft\nlight_filter, diffusing_screen\nlighting\nlight_machine_gun\nlight_meter, exposure_meter, photometer\nlight_microscope\nlightning_rod, lightning_conductor\nlight_pen, electronic_stylus\nlightship\nLilo\nlimber\nlimekiln\nlimiter, clipper\nlimousine, limo\nlinear_accelerator, linac\nlinen\nline_printer, line-at-a-time_printer\nliner, ocean_liner\nliner, lining\nlingerie, intimate_apparel\nlining, liner\nlink, data_link\nlinkage\nLink_trainer\nlinocut\nlinoleum_knife, linoleum_cutter\nLinotype, Linotype_machine\nlinsey-woolsey\nlinstock\nlion-jaw_forceps\nlip-gloss\nlipstick, lip_rouge\nliqueur_glass\nliquid_crystal_display, LCD\nliquid_metal_reactor\nlisle\nlister, lister_plow, lister_plough, middlebreaker, middle_buster\nlitterbin, litter_basket, litter-basket\nlittle_theater, little_theatre\nlive_axle, driving_axle\nliving_quarters, quarters\nliving_room, living-room, sitting_room, front_room, parlor, parlour\nload\nLoafer\nloaner\nlobe\nlobster_pot\nlocal\nlocal_area_network, LAN\nlocal_oscillator, heterodyne_oscillator\nLochaber_ax\nlock\nlock, ignition_lock\nlock, lock_chamber\nlock\nlockage\nlocker\nlocker_room\nlocket\nlock-gate\nlocking_pliers\nlockring, lock_ring, lock_washer\nlockstitch\nlockup\nlocomotive, engine, locomotive_engine, railway_locomotive\nlodge, indian_lodge\nlodge, hunting_lodge\nlodge\nlodging_house, rooming_house\nloft, attic, garret\nloft, pigeon_loft\nloft\nlog_cabin\nloggia\nlongbow\nlong_iron\nlong_johns\nlong_sleeve\nlong_tom\nlong_trousers, long_pants\nlong_underwear, union_suit\nlooking_glass, glass\nlookout, observation_tower, lookout_station, observatory\nloom\nloop_knot\nlorgnette\nLorraine_cross, cross_of_Lorraine\nlorry, camion\nlota\nlotion\nloudspeaker, speaker, speaker_unit, loudspeaker_system, speaker_system\nlounge, waiting_room, waiting_area\nlounger\nlounging_jacket, smoking_jacket\nlounging_pajama, lounging_pyjama\nloungewear\nloupe, jeweler's_loupe\nlouvered_window, jalousie\nlove_knot, lovers'_knot, lover's_knot, true_lovers'_knot, true_lover's_knot\nlove_seat, loveseat, tete-a-tete, vis-a-vis\nloving_cup\nlowboy\nlow-pass_filter\nlow-warp-loom\nLP, L-P\nL-plate\nlubber's_hole\nlubricating_system, force-feed_lubricating_system, force_feed, pressure-feed_lubricating_system, pressure_feed\nluff\nlug\nluge\nLuger\nluggage_carrier\nluggage_compartment, automobile_trunk, trunk\nluggage_rack, roof_rack\nlugger\nlugsail, lug\nlug_wrench\nlumberjack, lumber_jacket\nlumbermill, sawmill\nlunar_excursion_module, lunar_module, LEM\nlunchroom\nlunette\nlungi, lungyi, longyi\nlunula\nlusterware\nlute\nluxury_liner, express_luxury_liner\nlyceum\nlychgate, lichgate\nlyre\nmachete, matchet, panga\nmachicolation\nmachine\nmachine, simple_machine\nmachine_bolt\nmachine_gun\nmachinery\nmachine_screw\nmachine_tool\nmachinist's_vise, metalworking_vise\nmachmeter\nmackinaw\nmackinaw, Mackinaw_boat\nmackinaw, Mackinaw_coat\nmackintosh, macintosh\nmacrame\nmadras\nMae_West, air_jacket\nmagazine_rack\nmagic_lantern\nmagnet\nmagnetic_bottle\nmagnetic_compass\nmagnetic_core_memory, core_memory\nmagnetic_disk, magnetic_disc, disk, disc\nmagnetic_head\nmagnetic_mine\nmagnetic_needle\nmagnetic_recorder\nmagnetic_stripe\nmagnetic_tape, mag_tape, tape\nmagneto, magnetoelectric_machine\nmagnetometer, gaussmeter\nmagnetron\nmagnifier\nmagnum\nmagnus_hitch\nmail\nmailbag, postbag\nmailbag, mail_pouch\nmailboat, mail_boat, packet, packet_boat\nmailbox, letter_box\nmail_car\nmaildrop\nmailer\nmaillot\nmaillot, tank_suit\nmailsorter\nmail_train\nmainframe, mainframe_computer\nmainmast\nmain_rotor\nmainsail\nmainspring\nmain-topmast\nmain-topsail\nmain_yard\nmaisonette, maisonnette\nmajolica, maiolica\nmakeup, make-up, war_paint\nMaksutov_telescope\nmalacca, malacca_cane\nmallet, beetle\nmallet, hammer\nmallet\nmammogram\nmandola\nmandolin\nmanger, trough\nmangle\nmanhole\nmanhole_cover\nman-of-war, ship_of_the_line\nmanometer\nmanor, manor_house\nmanor_hall, hall\nMANPAD\nmansard, mansard_roof\nmanse\nmansion, mansion_house, manse, hall, residence\nmantel, mantelpiece, mantle, mantlepiece, chimneypiece\nmantelet, mantilla\nmantilla\nMao_jacket\nmap\nmaquiladora\nmaraca\nmarble\nmarching_order\nmarimba, xylophone\nmarina\nmarker\nmarketplace, market_place, mart, market\nmarlinespike, marlinspike, marlingspike\nmarocain, crepe_marocain\nmarquee, marquise\nmarquetry, marqueterie\nmarriage_bed\nmartello_tower\nmartingale\nmascara\nmaser\nmasher\nmashie, five_iron\nmashie_niblick, seven_iron\nmasjid, musjid\nmask\nmask\nMasonite\nMason_jar\nmasonry\nmason's_level\nmassage_parlor\nmassage_parlor\nmass_spectrograph\nmass_spectrometer, spectrometer\nmast\nmast\nmastaba, mastabah\nmaster_bedroom\nmasterpiece, chef-d'oeuvre\nmat\nmat, gym_mat\nmatch, lucifer, friction_match\nmatch\nmatchboard\nmatchbook\nmatchbox\nmatchlock\nmatch_plane, tonguing_and_grooving_plane\nmatchstick\nmaterial\nmateriel, equipage\nmaternity_hospital\nmaternity_ward\nmatrix\nMatthew_Walker, Matthew_Walker_knot\nmatting\nmattock\nmattress_cover\nmaul, sledge, sledgehammer\nmaulstick, mahlstick\nMauser\nmausoleum\nmaxi\nMaxim_gun\nmaximum_and_minimum_thermometer\nmaypole\nmaze, labyrinth\nmazer\nmeans\nmeasure\nmeasuring_cup\nmeasuring_instrument, measuring_system, measuring_device\nmeasuring_stick, measure, measuring_rod\nmeat_counter\nmeat_grinder\nmeat_hook\nmeat_house\nmeat_safe\nmeat_thermometer\nmechanical_device\nmechanical_piano, Pianola, player_piano\nmechanical_system\nmechanism\nmedical_building, health_facility, healthcare_facility\nmedical_instrument\nmedicine_ball\nmedicine_chest, medicine_cabinet\nMEDLINE\nmegalith, megalithic_structure\nmegaphone\nmemorial, monument\nmemory, computer_memory, storage, computer_storage, store, memory_board\nmemory_chip\nmemory_device, storage_device\nmenagerie, zoo, zoological_garden\nmending\nmenhir, standing_stone\nmenorah\nMenorah\nman's_clothing\nmen's_room, men's\nmercantile_establishment, retail_store, sales_outlet, outlet\nmercury_barometer\nmercury_cell\nmercury_thermometer, mercury-in-glass_thermometer\nmercury-vapor_lamp\nmercy_seat\nmerlon\nmess, mess_hall\nmess_jacket, monkey_jacket, shell_jacket\nmess_kit\nmessuage\nmetal_detector\nmetallic\nmetal_screw\nmetal_wood\nmeteorological_balloon\nmeter\nmeterstick, metrestick\nmetronome\nmezzanine, mezzanine_floor, entresol\nmezzanine, first_balcony\nmicrobalance\nmicrobrewery\nmicrofiche\nmicrofilm\nmicrometer, micrometer_gauge, micrometer_caliper\nmicrophone, mike\nmicroprocessor\nmicroscope\nmicrotome\nmicrowave, microwave_oven\nmicrowave_diathermy_machine\nmicrowave_linear_accelerator\nmiddy, middy_blouse\nmidiron, two_iron\nmihrab\nmihrab\nmilitary_hospital\nmilitary_quarters\nmilitary_uniform\nmilitary_vehicle\nmilk_bar\nmilk_can\nmilk_float\nmilking_machine\nmilking_stool\nmilk_wagon, milkwagon\nmill, grinder, milling_machinery\nmilldam\nmiller, milling_machine\nmilliammeter\nmillinery, woman's_hat\nmillinery, hat_shop\nmilling\nmillivoltmeter\nmillstone\nmillstone\nmillwheel, mill_wheel\nmimeograph, mimeo, mimeograph_machine, Roneo, Roneograph\nminaret\nmincer, mincing_machine\nmine\nmine_detector\nminelayer\nmineshaft\nminibar, cellaret\nminibike, motorbike\nminibus\nminicar\nminicomputer\nministry\nminiskirt, mini\nminisub, minisubmarine\nminivan\nminiver\nmink, mink_coat\nminster\nmint\nminute_hand, big_hand\nMinuteman\nmirror\nmissile\nmissile_defense_system, missile_defence_system\nmiter_box, mitre_box\nmiter_joint, mitre_joint, miter, mitre\nmitten\nmixer\nmixer\nmixing_bowl\nmixing_faucet\nmizzen, mizen\nmizzenmast, mizenmast, mizzen, mizen\nmobcap\nmobile_home, manufactured_home\nmoccasin, mocassin\nmock-up\nmod_con\nModel_T\nmodem\nmodillion\nmodule\nmodule\nmohair\nmoire, watered-silk\nmold, mould, cast\nmoldboard, mouldboard\nmoldboard_plow, mouldboard_plough\nmoleskin\nMolotov_cocktail, petrol_bomb, gasoline_bomb\nmonastery\nmonastic_habit\nmoneybag\nmoney_belt\nmonitor\nmonitor\nmonitor, monitoring_device\nmonkey-wrench, monkey_wrench\nmonk's_cloth\nmonochrome\nmonocle, eyeglass\nmonofocal_lens_implant, monofocal_IOL\nmonoplane\nmonotype\nmonstrance, ostensorium\nmooring_tower, mooring_mast\nMoorish_arch, horseshoe_arch\nmoped\nmop_handle\nmoquette\nmorgue, mortuary, dead_room\nmorion, cabasset\nmorning_dress\nmorning_dress\nmorning_room\nMorris_chair\nmortar, howitzer, trench_mortar\nmortar\nmortarboard\nmortise_joint, mortise-and-tenon_joint\nmosaic\nmosque\nmosquito_net\nmotel\nmotel_room\nMother_Hubbard, muumuu\nmotion-picture_camera, movie_camera, cine-camera\nmotion-picture_film, movie_film, cine-film\nmotley\nmotley\nmotor\nmotorboat, powerboat\nmotorcycle, bike\nmotor_hotel, motor_inn, motor_lodge, tourist_court, court\nmotorized_wheelchair\nmotor_scooter, scooter\nmotor_vehicle, automotive_vehicle\nmound, hill\nmound, hill, pitcher's_mound\nmount, setting\nmountain_bike, all-terrain_bike, off-roader\nmountain_tent\nmouse, computer_mouse\nmouse_button\nmousetrap\nmousse, hair_mousse, hair_gel\nmouthpiece, embouchure\nmouthpiece\nmouthpiece, gumshield\nmovement\nmovie_projector, cine_projector, film_projector\nmoving-coil_galvanometer\nmoving_van\nmud_brick\nmudguard, splash_guard, splash-guard\nmudhif\nmuff\nmuffle\nmuffler\nmufti\nmug\nmulch\nmule, scuff\nmultichannel_recorder\nmultiengine_airplane, multiengine_plane\nmultiplex\nmultiplexer\nmultiprocessor\nmultistage_rocket, step_rocket\nmunition, ordnance, ordnance_store\nMurphy_bed\nmusette, shepherd's_pipe\nmusette_pipe\nmuseum\nmushroom_anchor\nmusical_instrument, instrument\nmusic_box, musical_box\nmusic_hall, vaudeville_theater, vaudeville_theatre\nmusic_school\nmusic_stand, music_rack\nmusic_stool, piano_stool\nmusket\nmusket_ball, ball\nmuslin\nmustache_cup, moustache_cup\nmustard_plaster, sinapism\nmute\nmuzzle_loader\nmuzzle\nmyelogram\nnacelle\nnail\nnailbrush\nnailfile\nnailhead\nnailhead\nnail_polish, nail_enamel, nail_varnish\nnainsook\nNapier's_bones, Napier's_rods\nnard, spikenard\nnarrowbody_aircraft, narrow-body_aircraft, narrow-body\nnarrow_wale\nnarthex\nnarthex\nnasotracheal_tube\nnational_monument\nnautilus, nuclear_submarine, nuclear-powered_submarine\nnavigational_system\nnaval_equipment\nnaval_gun\nnaval_missile\nnaval_radar\nnaval_tactical_data_system\nnaval_weaponry\nnave\nnavigational_instrument\nnebuchadnezzar\nneckband\nneck_brace\nneckcloth, stock\nneckerchief\nnecklace\nnecklet\nneckline\nneckpiece\nnecktie, tie\nneckwear\nneedle\nneedle\nneedlenose_pliers\nneedlework, needlecraft\nnegative\nnegative_magnetic_pole, negative_pole, south-seeking_pole\nnegative_pole\nnegligee, neglige, peignoir, wrapper, housecoat\nneolith\nneon_lamp, neon_induction_lamp, neon_tube\nnephoscope\nnest\nnest_egg\nnet, network, mesh, meshing, meshwork\nnet\nnet\nnet\nnetwork, electronic_network\nnetwork\nneutron_bomb\nnewel\nnewel_post, newel\nnewspaper, paper\nnewsroom\nnewsroom\nnewsstand\nNewtonian_telescope, Newtonian_reflector\nnib, pen_nib\nniblick, nine_iron\nnicad, nickel-cadmium_accumulator\nnickel-iron_battery, nickel-iron_accumulator\nNicol_prism\nnight_bell\nnightcap\nnightgown, gown, nightie, night-robe, nightdress\nnight_latch\nnight-light\nnightshirt\nnightwear, sleepwear, nightclothes\nninepin, skittle, skittle_pin\nninepin_ball, skittle_ball\nninon\nnipple\nnipple_shield\nniqab\nNissen_hut, Quonset_hut\nnogging\nnoisemaker\nnonsmoker, nonsmoking_car\nnon-volatile_storage, nonvolatile_storage\nNorfolk_jacket\nnoria\nnosebag, feedbag\nnoseband, nosepiece\nnose_flute\nnosewheel\nnotebook, notebook_computer\nnuclear-powered_ship\nnuclear_reactor, reactor\nnuclear_rocket\nnuclear_weapon, atomic_weapon\nnude, nude_painting\nnumdah, numdah_rug, nammad\nnun's_habit\nnursery, baby's_room\nnut_and_bolt\nnutcracker\nnylon\nnylons, nylon_stocking, rayons, rayon_stocking, silk_stocking\noar\noast\noast_house\nobelisk\nobject_ball\nobjective, objective_lens, object_lens, object_glass\noblique_bandage\noboe, hautboy, hautbois\noboe_da_caccia\noboe_d'amore\nobservation_dome\nobservatory\nobstacle\nobturator\nocarina, sweet_potato\noctant\nodd-leg_caliper\nodometer, hodometer, mileometer, milometer\noeil_de_boeuf\noffice, business_office\noffice_building, office_block\noffice_furniture\nofficer's_mess\noff-line_equipment, auxiliary_equipment\nogee, cyma_reversa\nogee_arch, keel_arch\nohmmeter\noil, oil_color, oil_colour\noilcan\noilcloth\noil_filter\noil_heater, oilstove, kerosene_heater, kerosine_heater\noil_lamp, kerosene_lamp, kerosine_lamp\noil_paint\noil_pump\noil_refinery, petroleum_refinery\noilskin, slicker\noil_slick\noilstone\noil_tanker, oiler, tanker, tank_ship\nold_school_tie\nolive_drab\nolive_drab, olive-drab_uniform\nOlympian_Zeus\nomelet_pan, omelette_pan\nomnidirectional_antenna, nondirectional_antenna\nomnirange, omnidirectional_range, omnidirectional_radio_range\nonion_dome\nopen-air_market, open-air_marketplace, market_square\nopen_circuit\nopen-end_wrench, tappet_wrench\nopener\nopen-hearth_furnace\nopenside_plane, rabbet_plane\nopen_sight\nopenwork\nopera, opera_house\nopera_cloak, opera_hood\noperating_microscope\noperating_room, OR, operating_theater, operating_theatre, surgery\noperating_table\nophthalmoscope\noptical_device\noptical_disk, optical_disc\noptical_instrument\noptical_pyrometer, pyroscope\noptical_telescope\norchestra_pit, pit\nordinary, ordinary_bicycle\norgan, pipe_organ\norgandy, organdie\norganic_light-emitting_diode, OLED\norgan_loft\norgan_pipe, pipe, pipework\norganza\noriel, oriel_window\noriflamme\nO_ring\nOrlon\norlop_deck, orlop, fourth_deck\norphanage, orphans'_asylum\norphrey\norrery\northicon, image_orthicon\northochromatic_film\northopter, ornithopter\northoscope\noscillograph\noscilloscope, scope, cathode-ray_oscilloscope, CRO\nossuary\notoscope, auriscope, auroscope\nottoman, pouf, pouffe, puff, hassock\noubliette\nout-basket, out-tray\noutboard_motor, outboard\noutboard_motorboat, outboard\noutbuilding\nouterwear, overclothes\noutfall\noutfit, getup, rig, turnout\noutfitter\nouthouse, privy, earth-closet, jakes\noutput_device\noutrigger\noutrigger_canoe\noutside_caliper\noutside_mirror\noutwork\noven\noven_thermometer\noverall\noverall, boilersuit, boilers_suit\novercoat, overcoating\noverdrive\novergarment, outer_garment\noverhand_knot\noverhang\noverhead_projector\novermantel\novernighter, overnight_bag, overnight_case\noverpass, flyover\noverride\novershoe\noverskirt\noxbow\nOxbridge\noxcart\noxeye\noxford\noximeter\noxyacetylene_torch\noxygen_mask\noyster_bar\noyster_bed, oyster_bank, oyster_park\npace_car\npacemaker, artificial_pacemaker\npack\npack\npack, face_pack\npackage, parcel\npackage_store, liquor_store, off-licence\npackaging\npacket\npacking_box, packing_case\npackinghouse, packing_plant\npackinghouse\npacking_needle\npacksaddle\npaddle, boat_paddle\npaddle\npaddle\npaddle_box, paddle-box\npaddle_steamer, paddle-wheeler\npaddlewheel, paddle_wheel\npaddock\npadlock\npage_printer, page-at-a-time_printer\npaint, pigment\npaintball\npaintball_gun\npaintbox\npaintbrush\npaisley\npajama, pyjama, pj's, jammies\npajama, pyjama\npalace\npalace, castle\npalace\npalanquin, palankeen\npaleolith\npalestra, palaestra\npalette, pallet\npalette_knife\npalisade\npallet\npallette, palette\npallium\npallium\npan\npan, cooking_pan\npancake_turner\npanchromatic_film\npanda_car\npaneling, panelling, pane\npanhandle\npanic_button\npannier\npannier\npannikin\npanopticon\npanopticon\npanpipe, pandean_pipe, syrinx\npantaloon\npantechnicon\npantheon\npantheon\npantie, panty, scanty, step-in\npanting, trousering\npant_leg, trouser_leg\npantograph\npantry, larder, buttery\npants_suit, pantsuit\npanty_girdle\npantyhose\npanzer\npaper_chain\npaper_clip, paperclip, gem_clip\npaper_cutter\npaper_fastener\npaper_feed\npaper_mill\npaper_towel\nparabolic_mirror\nparabolic_reflector, paraboloid_reflector\nparachute, chute\nparallel_bars, bars\nparallel_circuit, shunt_circuit\nparallel_interface, parallel_port\nparang\nparapet, breastwork\nparapet\nparasail\nparasol, sunshade\nparer, paring_knife\nparfait_glass\npargeting, pargetting, pargetry\npari-mutuel_machine, totalizer, totaliser, totalizator, totalisator\nparka, windbreaker, windcheater, anorak\npark_bench\nparking_meter\nparlor, parlour\nparquet, parquet_floor\nparquetry, parqueterie\nparsonage, vicarage, rectory\nParsons_table\npartial_denture\nparticle_detector\npartition, divider\nparts_bin\nparty_line\nparty_wall\nparvis\npassenger_car, coach, carriage\npassenger_ship\npassenger_train\npassenger_van\npasse-partout\npassive_matrix_display\npasskey, passe-partout, master_key, master\npass-through\npastry_cart\npatch\npatchcord\npatchouli, patchouly, pachouli\npatch_pocket\npatchwork, patchwork_quilt\npatent_log, screw_log, taffrail_log\npaternoster\npatina\npatio, terrace\npatisserie\npatka\npatrol_boat, patrol_ship\npatty-pan\npave\npavilion, marquee\npavior, paviour, paving_machine\npavis, pavise\npawn\npawnbroker's_shop, pawnshop, loan_office\npay-phone, pay-station\nPC_board\npeach_orchard\npea_jacket, peacoat\npeavey, peavy, cant_dog, dog_hook\npectoral, pectoral_medallion\npedal, treadle, foot_pedal, foot_lever\npedal_pusher, toreador_pants\npedestal, plinth, footstall\npedestal_table\npedestrian_crossing, zebra_crossing\npedicab, cycle_rickshaw\npediment\npedometer\npeeler\npeep_sight\npeg, nog\npeg, pin, thole, tholepin, rowlock, oarlock\npeg\npeg, wooden_leg, leg, pegleg\npegboard\nPelham\npelican_crossing\npelisse\npelvimeter\npen\npenal_colony\npenal_institution, penal_facility\npenalty_box\npen-and-ink\npencil\npencil\npencil_box, pencil_case\npencil_sharpener\npendant_earring, drop_earring, eardrop\npendulum\npendulum_clock\npendulum_watch\npenetration_bomb\npenile_implant\npenitentiary, pen\npenknife\npenlight\npennant, pennon, streamer, waft\npennywhistle, tin_whistle, whistle\npenthouse\npentode\npeplos, peplus, peplum\npeplum\npepper_mill, pepper_grinder\npepper_shaker, pepper_box, pepper_pot\npepper_spray\npercale\npercolator\npercussion_cap\npercussion_instrument, percussive_instrument\nperforation\nperfume, essence\nperfumery\nperfumery\nperfumery\nperipheral, computer_peripheral, peripheral_device\nperiscope\nperistyle\nperiwig, peruke\npermanent_press, durable_press\nperpetual_motion_machine\npersonal_computer, PC, microcomputer\npersonal_digital_assistant, PDA, personal_organizer, personal_organiser, organizer, organiser\npersonnel_carrier\npestle\npestle, muller, pounder\npetcock\nPetri_dish\npetrolatum_gauze\npet_shop\npetticoat, half-slip, underskirt\npew, church_bench\nphial, vial, ampule, ampul, ampoule\nPhillips_screw\nPhillips_screwdriver\nphonograph_needle, needle\nphonograph_record, phonograph_recording, record, disk, disc, platter\nphotocathode\nphotocoagulator\nphotocopier\nphotographic_equipment\nphotographic_paper, photographic_material\nphotometer\nphotomicrograph\nPhotostat, Photostat_machine\nphotostat\nphysical_pendulum, compound_pendulum\npiano, pianoforte, forte-piano\npiano_action\npiano_keyboard, fingerboard, clavier\npiano_wire\npiccolo\npick, pickax, pickaxe\npick\npick, plectrum, plectron\npickelhaube\npicket_boat\npicket_fence, paling\npicket_ship\npickle_barrel\npickup, pickup_truck\npicture, image, icon, ikon\npicture_frame\npicture_hat\npicture_rail\npicture_window\npiece_of_cloth, piece_of_material\npied-a-terre\npier\npier\npier_arch\npier_glass, pier_mirror\npier_table\npieta\npiezometer\npig_bed, pig\npiggery, pig_farm\npiggy_bank, penny_bank\npilaster\npile, spile, piling, stilt\npile_driver\npill_bottle\npillbox, toque, turban\npillion\npillory\npillow\npillow_block\npillow_lace, bobbin_lace\npillow_sham\npilot_bit\npilot_boat\npilot_burner, pilot_light, pilot\npilot_cloth\npilot_engine\npilothouse, wheelhouse\npilot_light, pilot_lamp, indicator_lamp\npin\npin, flag\npin, pin_tumbler\npinata\npinball_machine, pin_table\npince-nez\npincer, pair_of_pincers, tweezer, pair_of_tweezers\npinch_bar\npincurl_clip\npinfold\nping-pong_ball\npinhead\npinion\npinnacle\npinprick\npinstripe\npinstripe\npinstripe\npintle\npinwheel, pinwheel_wind_collector\npinwheel\ntabor_pipe\npipe\npipe_bomb\npipe_cleaner\npipe_cutter\npipefitting, pipe_fitting\npipet, pipette\npipe_vise, pipe_clamp\npipe_wrench, tube_wrench\npique\npirate, pirate_ship\npiste\npistol, handgun, side_arm, shooting_iron\npistol_grip\npiston, plunger\npiston_ring\npiston_rod\npit\npitcher, ewer\npitchfork\npitching_wedge\npitch_pipe\npith_hat, pith_helmet, sun_helmet, topee, topi\npiton\nPitot-static_tube, Pitot_head, Pitot_tube\nPitot_tube, Pitot\npitsaw\npivot, pin\npivoting_window\npizzeria, pizza_shop, pizza_parlor\nplace_of_business, business_establishment\nplace_of_worship, house_of_prayer, house_of_God, house_of_worship\nplacket\nplanchet, coin_blank\nplane, carpenter's_plane, woodworking_plane\nplane, planer, planing_machine\nplane_seat\nplanetarium\nplanetarium\nplanetarium\nplanetary_gear, epicyclic_gear, planet_wheel, planet_gear\nplank-bed\nplanking\nplanner\nplant, works, industrial_plant\nplanter\nplaster, adhesive_plaster, sticking_plaster\nplasterboard, gypsum_board\nplastering_trowel\nplastic_bag\nplastic_bomb\nplastic_laminate\nplastic_wrap\nplastron\nplastron\nplastron\nplate, scale, shell\nplate, collection_plate\nplate\nplaten\nplaten\nplate_rack\nplate_rail\nplatform\nplatform, weapons_platform\nplatform\nplatform_bed\nplatform_rocker\nplating, metal_plating\nplatter\nplayback\nplaybox, play-box\nplayground\nplaypen, pen\nplaysuit\nplaza, mall, center, shopping_mall, shopping_center, shopping_centre\npleat, plait\nplenum\nplethysmograph\npleximeter, plessimeter\nplexor, plessor, percussor\npliers, pair_of_pliers, plyers\nplimsoll\nplotter\nplow, plough\nplug, stopper, stopple\nplug, male_plug\nplug_fuse\nplughole\nplumb_bob, plumb, plummet\nplumb_level\nplunger, plumber's_helper\nplus_fours\nplush\nplywood, plyboard\npneumatic_drill\np-n_junction\np-n-p_transistor\npoacher\npocket\npocket_battleship\npocketcomb, pocket_comb\npocket_flap\npocket-handkerchief\npocketknife, pocket_knife\npocket_watch\npod, fuel_pod\npogo_stick\npoint-and-shoot_camera\npointed_arch\npointing_trowel\npoint_lace, needlepoint\npoker, stove_poker, fire_hook, salamander\npolarimeter, polariscope\nPolaroid\nPolaroid_camera, Polaroid_Land_camera\npole\npole\npoleax, poleaxe\npoleax, poleaxe\npolice_boat\npolice_van, police_wagon, paddy_wagon, patrol_wagon, wagon, black_Maria\npolling_booth\npolo_ball\npolo_mallet, polo_stick\npolonaise\npolo_shirt, sport_shirt\npolyester\npolygraph\npomade, pomatum\npommel_horse, side_horse\nponcho\npongee\nponiard, bodkin\npontifical\npontoon\npontoon_bridge, bateau_bridge, floating_bridge\npony_cart, ponycart, donkey_cart, tub-cart\npool_ball\npoolroom\npool_table, billiard_table, snooker_table\npoop_deck\npoor_box, alms_box, mite_box\npoorhouse\npop_bottle, soda_bottle\npopgun\npoplin\npopper\npoppet, poppet_valve\npop_tent\nporcelain\nporch\nporkpie, porkpie_hat\nporringer\nportable\nportable_computer\nportable_circular_saw, portable_saw\nportcullis\nporte-cochere\nporte-cochere\nportfolio\nporthole\nportico\nportiere\nportmanteau, Gladstone, Gladstone_bag\nportrait_camera\nportrait_lens\npositive_pole, positive_magnetic_pole, north-seeking_pole\npositive_pole\npositron_emission_tomography_scanner, PET_scanner\npost\npostage_meter\npost_and_lintel\npost_chaise\npostern\npost_exchange, PX\nposthole_digger, post-hole_digger\npost_horn\nposthouse, post_house\npot\npot, flowerpot\npotbelly, potbelly_stove\nPotemkin_village\npotential_divider, voltage_divider\npotentiometer, pot\npotentiometer\npotpourri\npotsherd\npotter's_wheel\npottery, clayware\npottle\npotty_seat, potty_chair\npouch\npoultice, cataplasm, plaster\npound, dog_pound\npound_net\npowder\npowder_and_shot\npowdered_mustard, dry_mustard\npowder_horn, powder_flask\npowder_keg\npower_brake\npower_cord\npower_drill\npower_line, power_cable\npower_loom\npower_mower, motor_mower\npower_pack\npower_saw, saw, sawing_machine\npower_shovel, excavator, digger, shovel\npower_steering, power-assisted_steering\npower_takeoff, PTO\npower_tool\npraetorium, pretorium\nprayer_rug, prayer_mat\nprayer_shawl, tallith, tallis\nprecipitator, electrostatic_precipitator, Cottrell_precipitator\nprefab\npresbytery\npresence_chamber\npress, mechanical_press\npress, printing_press\npress\npress_box\npress_gallery\npress_of_sail, press_of_canvas\npressure_cabin\npressure_cooker\npressure_dome\npressure_gauge, pressure_gage\npressurized_water_reactor, PWR\npressure_suit\npricket\nprie-dieu\nprimary_coil, primary_winding, primary\nPrimus_stove, Primus\nPrince_Albert\nprint\nprint_buffer\nprinted_circuit\nprinter, printing_machine\nprinter\nprinter_cable\npriory\nprison, prison_house\nprison_camp, internment_camp, prisoner_of_war_camp, POW_camp\nprivateer\nprivate_line\nprivet_hedge\nprobe\nproctoscope\nprod, goad\nproduction_line, assembly_line, line\nprojectile, missile\nprojector\nprojector\nprolonge\nprolonge_knot, sailor's_breastplate\nprompter, autocue\nprong\npropeller, propellor\npropeller_plane\npropjet, turboprop, turbo-propeller_plane\nproportional_counter_tube, proportional_counter\npropulsion_system\nproscenium, proscenium_wall\nproscenium_arch\nprosthesis, prosthetic_device\nprotective_covering, protective_cover, protection\nprotective_garment\nproton_accelerator\nprotractor\npruner, pruning_hook, lopper\npruning_knife\npruning_saw\npruning_shears\npsaltery\npsychrometer\nPT_boat, mosquito_boat, mosquito_craft, motor_torpedo_boat\npublic_address_system, P.A._system, PA_system, P.A., PA\npublic_house, pub, saloon, pothouse, gin_mill, taphouse\npublic_toilet, comfort_station, public_convenience, convenience, public_lavatory, restroom, toilet_facility, wash_room\npublic_transport\npublic_works\npuck, hockey_puck\npull\npullback, tieback\npull_chain\npulley, pulley-block, pulley_block, block\npull-off, rest_area, rest_stop, layby, lay-by\nPullman, Pullman_car\npullover, slipover\npull-through\npulse_counter\npulse_generator\npulse_timing_circuit\npump\npump\npump_action, slide_action\npump_house, pumping_station\npump_room\npump-type_pliers\npump_well\npunch, puncher\npunchboard\npunch_bowl\npunching_bag, punch_bag, punching_ball, punchball\npunch_pliers\npunch_press\npunnet\npunt\npup_tent, shelter_tent\npurdah\npurifier\npurl, purl_stitch\npurse\npush-bike\npush_broom\npush_button, push, button\npush-button_radio\npusher, zori\nput-put\nputtee\nputter, putting_iron\nputty_knife\npuzzle\npylon, power_pylon\npylon\npyramidal_tent\npyrograph\npyrometer\npyrometric_cone\npyrostat\npyx, pix\npyx, pix, pyx_chest, pix_chest\npyxis\nquad, quadrangle\nquadrant\nquadraphony, quadraphonic_system, quadriphonic_system\nquartering\nquarterstaff\nquartz_battery, quartz_mill\nquartz_lamp\nqueen\nqueen\nqueen_post\nquern\nquill, quill_pen\nquilt, comforter, comfort, puff\nquilted_bedspread\nquilting\nquipu\nquirk_molding, quirk_moulding\nquirt\nquiver\nquoin, coign, coigne\nquoit\nQWERTY_keyboard\nrabbet, rebate\nrabbet_joint\nrabbit_ears\nrabbit_hutch\nraceabout\nracer, race_car, racing_car\nraceway, race\nracing_boat\nracing_gig\nracing_skiff, single_shell\nrack, stand\nrack\nrack, wheel\nrack_and_pinion\nracket, racquet\nracquetball\nradar, microwave_radar, radio_detection_and_ranging, radiolocation\nradial, radial_tire, radial-ply_tire\nradial_engine, rotary_engine\nradiation_pyrometer\nradiator\nradiator\nradiator_cap\nradiator_hose\nradio, wireless\nradio_antenna, radio_aerial\nradio_chassis\nradio_compass\nradiogram, radiograph, shadowgraph, skiagraph, skiagram\nradio_interferometer\nradio_link, link\nradiometer\nradiomicrometer\nradio-phonograph, radio-gramophone\nradio_receiver, receiving_set, radio_set, radio, tuner, wireless\nradiotelegraph, radiotelegraphy, wireless_telegraph, wireless_telegraphy\nradiotelephone, radiophone, wireless_telephone\nradio_telescope, radio_reflector\nradiotherapy_equipment\nradio_transmitter\nradome, radar_dome\nraft\nrafter, balk, baulk\nraft_foundation\nrag, shred, tag, tag_end, tatter\nragbag\nraglan\nraglan_sleeve\nrail\nrail_fence\nrailhead\nrailing, rail\nrailing\nrailroad_bed\nrailroad_tunnel\nrain_barrel\nraincoat, waterproof\nrain_gauge, rain_gage, pluviometer, udometer\nrain_stick\nrake\nrake_handle\nRAM_disk\nramekin, ramequin\nramjet, ramjet_engine, atherodyde, athodyd, flying_drainpipe\nrammer\nramp, incline\nrampant_arch\nrampart, bulwark, wall\nramrod\nramrod\nranch, spread, cattle_ranch, cattle_farm\nranch_house\nrandom-access_memory, random_access_memory, random_memory, RAM, read/write_memory\nrangefinder, range_finder\nrange_hood\nrange_pole, ranging_pole, flagpole\nrapier, tuck\nrariora\nrasp, wood_file\nratchet, rachet, ratch\nratchet_wheel\nrathskeller\nratline, ratlin\nrat-tail_file\nrattan, ratan\nrattrap\nrayon\nrazor\nrazorblade\nreaction-propulsion_engine, reaction_engine\nreaction_turbine\nreactor\nreading_lamp\nreading_room\nread-only_memory, ROM, read-only_storage, fixed_storage\nread-only_memory_chip\nreadout, read-out\nread/write_head, head\nready-to-wear\nreal_storage\nreamer\nreamer, juicer, juice_reamer\nrearview_mirror\nReaumur_thermometer\nrebozo\nreceiver, receiving_system\nreceptacle\nreception_desk\nreception_room\nrecess, niche\nreciprocating_engine\nrecliner, reclining_chair, lounger\nreconnaissance_plane\nreconnaissance_vehicle, scout_car\nrecord_changer, auto-changer, changer\nrecorder, recording_equipment, recording_machine\nrecording\nrecording_system\nrecord_player, phonograph\nrecord_sleeve, record_cover\nrecovery_room\nrecreational_vehicle, RV, R.V.\nrecreation_room, rec_room\nrecycling_bin\nrecycling_plant\nredbrick_university\nred_carpet\nredoubt\nredoubt\nreduction_gear\nreed_pipe\nreed_stop\nreef_knot, flat_knot\nreel\nreel\nrefectory\nrefectory_table\nrefinery\nreflecting_telescope, reflector\nreflectometer\nreflector\nreflex_camera\nreflux_condenser\nreformatory, reform_school, training_school\nreformer\nrefracting_telescope\nrefractometer\nrefrigeration_system\nrefrigerator, icebox\nrefrigerator_car\nrefuge, sanctuary, asylum\nregalia\nregimentals\nregulator\nrein\nrelay, electrical_relay\nrelease, button\nreligious_residence, cloister\nreliquary\nremote_control, remote\nremote_terminal, link-attached_terminal, remote_station, link-attached_station\nremovable_disk\nrendering\nrep, repp\nrepair_shop, fix-it_shop\nrepeater\nrepeating_firearm, repeater\nrepository, monument\nreproducer\nrerebrace, upper_cannon\nrescue_equipment\nresearch_center, research_facility\nreseau\nreservoir\nreset\nreset_button\nresidence\nresistance_pyrometer\nresistor, resistance\nresonator\nresonator, cavity_resonator, resonating_chamber\nresort_hotel, spa\nrespirator, inhalator\nrestaurant, eating_house, eating_place, eatery\nrest_house\nrestraint, constraint\nresuscitator\nretainer\nretaining_wall\nreticle, reticule, graticule\nreticulation\nreticule\nretort\nretractor\nreturn_key, return\nreverberatory_furnace\nrevers, revere\nreverse, reverse_gear\nreversible\nrevetment, revetement, stone_facing\nrevetment\nrevolver, six-gun, six-shooter\nrevolving_door, revolver\nrheometer\nrheostat, variable_resistor\nrhinoscope\nrib\nriband, ribband\nribbed_vault\nribbing\nribbon_development\nrib_joint_pliers\nricer\nriddle\nride\nridge, ridgepole, rooftree\nridge_rope\nriding_boot\nriding_crop, hunting_crop\nriding_mower\nrifle\nrifle_ball\nrifle_grenade\nrig\nrigger, rigger_brush\nrigger\nrigging, tackle\nrigout\nringlet\nrings\nrink, skating_rink\nriot_gun\nripcord\nripcord\nripping_bar\nripping_chisel\nripsaw, splitsaw\nriser\nriser, riser_pipe, riser_pipeline, riser_main\nRitz\nriver_boat\nrivet\nriveting_machine, riveter, rivetter\nroach_clip, roach_holder\nroad, route\nroadbed\nroadblock, barricade\nroadhouse\nroadster, runabout, two-seater\nroadway\nroaster\nrobe\nrobotics_equipment\nRochon_prism, Wollaston_prism\nrock_bit, roller_bit\nrocker\nrocker, cradle\nrocker_arm, valve_rocker\nrocket, rocket_engine\nrocket, projectile\nrocking_chair, rocker\nrod\nrodeo\nroll\nroller\nroller\nroller_bandage\nin-line_skate\nRollerblade\nroller_blind\nroller_coaster, big_dipper, chute-the-chute\nroller_skate\nroller_towel\nroll_film\nrolling_hitch\nrolling_mill\nrolling_pin\nrolling_stock\nroll-on\nroll-on\nroll-on_roll-off\nRolodex\nRoman_arch, semicircular_arch\nRoman_building\nromper, romper_suit\nrood_screen\nroof\nroof\nroofing\nroom\nroomette\nroom_light\nroost\nrope\nrope_bridge\nrope_tow\nrose_water\nrose_window, rosette\nrosin_bag\nrotary_actuator, positioner\nrotary_engine\nrotary_press\nrotating_mechanism\nrotating_shaft, shaft\nrotisserie\nrotisserie\nrotor\nrotor, rotor_coil\nrotor\nrotor_blade, rotary_wing\nrotor_head, rotor_shaft\nrotunda\nrotunda\nrouge, paint, blusher\nroughcast\nrouleau\nroulette, toothed_wheel\nroulette_ball\nroulette_wheel, wheel\nround, unit_of_ammunition, one_shot\nround_arch\nround-bottom_flask\nroundel\nround_file\nroundhouse\nrouter\nrouter\nrouter_plane\nrowel\nrow_house, town_house\nrowing_boat\nrowlock_arch\nroyal\nroyal_mast\nrubber_band, elastic_band, elastic\nrubber_boot, gum_boot\nrubber_bullet\nrubber_eraser, rubber, pencil_eraser\nrudder\nrudder\nrudder_blade\nrug, carpet, carpeting\nrugby_ball\nruin\nrule, ruler\nrumble\nrumble_seat\nrummer\nrumpus_room, playroom, game_room\nruncible_spoon\nrundle, spoke, rung\nrunning_shoe\nrunning_suit\nrunway\nrushlight, rush_candle\nrusset\nrya, rya_rug\nsaber, sabre\nsaber_saw, jigsaw, reciprocating_saw\nsable\nsable, sable_brush, sable's_hair_pencil\nsable_coat\nsabot, wooden_shoe\nsachet\nsack, poke, paper_bag, carrier_bag\nsack, sacque\nsackbut\nsackcloth\nsackcloth\nsack_coat\nsacking, bagging\nsaddle\nsaddlebag\nsaddle_blanket, saddlecloth, horse_blanket\nsaddle_oxford, saddle_shoe\nsaddlery\nsaddle_seat\nsaddle_stitch\nsafe\nsafe\nsafe-deposit, safe-deposit_box, safety-deposit, safety_deposit_box, deposit_box, lockbox\nsafe_house\nsafety_arch\nsafety_belt, life_belt, safety_harness\nsafety_bicycle, safety_bike\nsafety_bolt, safety_lock\nsafety_curtain\nsafety_fuse\nsafety_lamp, Davy_lamp\nsafety_match, book_matches\nsafety_net\nsafety_pin\nsafety_rail, guardrail\nsafety_razor\nsafety_valve, relief_valve, escape_valve, escape_cock, escape\nsail, canvas, canvass, sheet\nsail\nsailboat, sailing_boat\nsailcloth\nsailing_vessel, sailing_ship\nsailing_warship\nsailor_cap\nsailor_suit\nsalad_bar\nsalad_bowl\nsalinometer\nsallet, salade\nsalon\nsalon\nsalon, beauty_salon, beauty_parlor, beauty_parlour, beauty_shop\nsaltbox\nsaltcellar\nsaltshaker, salt_shaker\nsaltworks\nsalver\nsalwar, shalwar\nSam_Browne_belt\nsamisen, shamisen\nsamite\nsamovar\nsampan\nsandal\nsandbag\nsandblaster\nsandbox\nsandglass\nsand_wedge\nsandwich_board\nsanitary_napkin, sanitary_towel, Kotex\ncling_film, clingfilm, Saran_Wrap\nsarcenet, sarsenet\nsarcophagus\nsari, saree\nsarong\nsash, window_sash\nsash_fastener, sash_lock, window_lock\nsash_window\nsatchel\nsateen\nsatellite, artificial_satellite, orbiter\nsatellite_receiver\nsatellite_television, satellite_TV\nsatellite_transmitter\nsatin\nSaturday_night_special\nsaucepan\nsaucepot\nsauna, sweat_room\nsavings_bank, coin_bank, money_box, bank\nsaw\nsawed-off_shotgun\nsawhorse, horse, sawbuck, buck\nsawmill\nsaw_set\nsax, saxophone\nsaxhorn\nscabbard\nscaffolding, staging\nscale\nscale, weighing_machine\nscaler\nscaling_ladder\nscalpel\nscanner, electronic_scanner\nscanner\nscanner, digital_scanner, image_scanner\nscantling, stud\nscarf\nscarf_joint, scarf\nscatter_rug, throw_rug\nscauper, scorper\nSchmidt_telescope, Schmidt_camera\nschool, schoolhouse\nschoolbag\nschool_bell\nschool_bus\nschool_ship, training_ship\nschool_system\nschooner\nschooner\nscientific_instrument\nscimitar\nscintillation_counter\nscissors, pair_of_scissors\nsclerometer\nscoinson_arch, sconcheon_arch\nsconce\nsconce\nscoop\nscooter\nscoreboard\nscouring_pad\nscow\nscow\nscraper\nscratcher\nscreen\nscreen, cover, covert, concealment\nscreen\nscreen, CRT_screen\nscreen_door, screen\nscreening\nscrew\nscrew, screw_propeller\nscrew\nscrewdriver\nscrew_eye\nscrew_key\nscrew_thread, thread\nscrewtop\nscrew_wrench\nscriber, scribe, scratch_awl\nscrim\nscrimshaw\nscriptorium\nscrubber\nscrub_brush, scrubbing_brush, scrubber\nscrub_plane\nscuffer\nscuffle, scuffle_hoe, Dutch_hoe\nscull\nscull\nscullery\nsculpture\nscuttle, coal_scuttle\nscyphus\nscythe\nseabag\nsea_boat\nsea_chest\nsealing_wax, seal\nsealskin\nseam\nseaplane, hydroplane\nsearchlight\nsearing_iron\nseat\nseat\nseat\nseat_belt, seatbelt\nsecateurs\nsecondary_coil, secondary_winding, secondary\nsecond_balcony, family_circle, upper_balcony, peanut_gallery\nsecond_base\nsecond_hand\nsecretary, writing_table, escritoire, secretaire\nsectional\nsecurity_blanket\nsecurity_system, security_measure, security\nsecurity_system\nsedan, saloon\nsedan, sedan_chair\nseeder\nseeker\nseersucker\nsegmental_arch\nSegway, Segway_Human_Transporter, Segway_HT\nseidel\nseine\nseismograph\nselector, selector_switch\nselenium_cell\nself-propelled_vehicle\nself-registering_thermometer\nself-starter\nselsyn, synchro\nselvage, selvedge\nsemaphore\nsemiautomatic_firearm\nsemiautomatic_pistol, semiautomatic\nsemiconductor_device, semiconductor_unit, semiconductor\nsemi-detached_house\nsemigloss\nsemitrailer, semi\nsennit\nsensitometer\nsentry_box\nseparate\nseptic_tank\nsequence, episode\nsequencer, sequenator\nserape, sarape\nserge\nserger\nserial_port\nserpent\nserration\nserver\nserver, host\nservice_club\nserving_cart\nserving_dish\nservo, servomechanism, servosystem\nset\nset_gun, spring_gun\nsetscrew\nsetscrew\nset_square\nsettee\nsettle, settee\nsettlement_house\nseventy-eight, 78\nSeven_Wonders_of_the_Ancient_World, Seven_Wonders_of_the_World\nsewage_disposal_plant, disposal_plant\nsewer, sewerage, cloaca\nsewing_basket\nsewing_kit\nsewing_machine\nsewing_needle\nsewing_room\nsextant\nsgraffito\nshackle, bond, hamper, trammel\nshackle\nshade\nshadow_box\nshaft\nshag_rug\nshaker\nshank\nshank, stem\nshantung\nshaper, shaping_machine\nshaping_tool\nsharkskin\nsharpener\nSharpie\nshaver, electric_shaver, electric_razor\nshaving_brush\nshaving_cream, shaving_soap\nshaving_foam\nshawl\nshawm\nshears\nsheath\nsheathing, overlay, overlayer\nshed\nsheep_bell\nsheepshank\nsheepskin_coat, afghan\nsheepwalk, sheeprun\nsheet, bed_sheet\nsheet_bend, becket_bend, weaver's_knot, weaver's_hitch\nsheeting\nsheet_pile, sheath_pile, sheet_piling\nSheetrock\nshelf\nshelf_bracket\nshell\nshell, case, casing\nshell, racing_shell\nshellac, shellac_varnish\nshelter\nshelter\nshelter\nsheltered_workshop\nSheraton\nshield, buckler\nshield\nshielding\nshift_key, shift\nshillelagh, shillalah\nshim\nshingle\nshin_guard, shinpad\nship\nshipboard_system\nshipping, cargo_ships, merchant_marine, merchant_vessels\nshipping_room\nship-towed_long-range_acoustic_detection_system\nshipwreck\nshirt\nshirt_button\nshirtdress\nshirtfront\nshirting\nshirtsleeve\nshirttail\nshirtwaist, shirtwaister\nshiv\nshock_absorber, shock, cushion\nshoe\nshoe\nshoebox\nshoehorn\nshoe_shop, shoe-shop, shoe_store\nshoetree\nshofar, shophar\nshoji\nshooting_brake\nshooting_lodge, shooting_box\nshooting_stick\nshop, store\nshop_bell\nshopping_bag\nshopping_basket\nshopping_cart\nshort_circuit, short\nshort_iron\nshort_pants, shorts, trunks\nshort_sleeve\nshortwave_diathermy_machine\nshot\nshot_glass, jigger, pony\nshotgun, scattergun\nshotgun_shell\nshot_tower\nshoulder\nshoulder_bag\nshouldered_arch\nshoulder_holster\nshoulder_pad\nshoulder_patch\nshovel\nshovel\nshovel_hat\nshowboat\nshower\nshower_cap\nshower_curtain\nshower_room\nshower_stall, shower_bath\nshowroom, salesroom, saleroom\nshrapnel\nshredder\nshrimper\nshrine\nshrink-wrap\nshunt\nshunt, electrical_shunt, bypass\nshunter\nshutter\nshutter\nshuttle\nshuttle\nshuttle_bus\nshuttlecock, bird, birdie, shuttle\nshuttle_helicopter\nSibley_tent\nsickbay, sick_berth\nsickbed\nsickle, reaping_hook, reap_hook\nsickroom\nsideboard\nsidecar\nside_chapel\nsidelight, running_light\nsidesaddle\nsidewalk, pavement\nsidewall\nside-wheeler\nsidewinder\nsieve, screen\nsifter\nsights\nsigmoidoscope, flexible_sigmoidoscope\nsignal_box, signal_tower\nsignaling_device\nsignboard, sign\nsilencer, muffler\nsilent_butler\nSilex\nsilk\nsilks\nsilo\nsilver_plate\nsilverpoint\nsimple_pendulum\nsimulator\nsingle_bed\nsingle-breasted_jacket\nsingle-breasted_suit\nsingle_prop, single-propeller_plane\nsingle-reed_instrument, single-reed_woodwind\nsingle-rotor_helicopter\nsinglestick, fencing_stick, backsword\nsinglet, vest, undershirt\nsiren\nsister_ship\nsitar\nsitz_bath, hip_bath\nsix-pack, six_pack, sixpack\nskate\nskateboard\nskeg\nskein\nskeleton, skeletal_frame, frame, underframe\nskeleton_key\nskep\nskep\nsketch, study\nsketcher\nskew_arch\nskewer\nski\nski_binding, binding\nskibob\nski_boot\nski_cap, stocking_cap, toboggan_cap\nskidder\nskid_lid\nskiff\nski_jump\nski_lodge\nski_mask\nskimmer\nski_parka, ski_jacket\nski-plane\nski_pole\nski_rack\nskirt\nskirt\nski_tow, ski_lift, lift\nSkivvies\nskullcap\nskybox\nskyhook\nskylight, fanlight\nskysail\nskyscraper\nskywalk\nslacks\nslack_suit\nslasher\nslash_pocket\nslat, spline\nslate\nslate_pencil\nslate_roof\nsled, sledge, sleigh\nsleeper\nsleeper\nsleeping_bag\nsleeping_car, sleeper, wagon-lit\nsleeve, arm\nsleeve\nsleigh_bed\nsleigh_bell, cascabel\nslice_bar\nslicer\nslicer\nslide, playground_slide, sliding_board\nslide_fastener, zip, zipper, zip_fastener\nslide_projector\nslide_rule, slipstick\nslide_valve\nsliding_door\nsliding_seat\nsliding_window\nsling, scarf_bandage, triangular_bandage\nsling\nslingback, sling\nslinger_ring\nslip_clutch, slip_friction_clutch\nslipcover\nslip-joint_pliers\nslipknot\nslip-on\nslipper, carpet_slipper\nslip_ring\nslit_lamp\nslit_trench\nsloop\nsloop_of_war\nslop_basin, slop_bowl\nslop_pail, slop_jar\nslops\nslopshop, slopseller's_shop\nslot, one-armed_bandit\nslot_machine, coin_machine\nsluice, sluiceway, penstock\nsmack\nsmall_boat\nsmall_computer_system_interface, SCSI\nsmall_ship\nsmall_stores\nsmart_bomb\nsmelling_bottle\nsmocking\nsmoke_bomb, smoke_grenade\nsmokehouse, meat_house\nsmoker, smoking_car, smoking_carriage, smoking_compartment\nsmoke_screen, smokescreen\nsmoking_room\nsmoothbore\nsmooth_plane, smoothing_plane\nsnack_bar, snack_counter, buffet\nsnaffle, snaffle_bit\nsnap, snap_fastener, press_stud\nsnap_brim\nsnap-brim_hat\nsnare, gin, noose\nsnare_drum, snare, side_drum\nsnatch_block\nsnifter, brandy_snifter, brandy_glass\nsniper_rifle, precision_rifle\nsnips, tinsnips\nSno-cat\nsnood\nsnorkel, schnorkel, schnorchel, snorkel_breather, breather\nsnorkel\nsnowbank, snow_bank\nsnowboard\nsnowmobile\nsnowplow, snowplough\nsnowshoe\nsnowsuit\nsnow_thrower, snow_blower\nsnuffbox\nsnuffer\nsnuffers\nsoapbox\nsoap_dish\nsoap_dispenser\nsoap_pad\nsoccer_ball\nsock\nsocket\nsocket_wrench\nsocle\nsoda_can\nsoda_fountain\nsoda_fountain\nsod_house, soddy, adobe_house\nsodium-vapor_lamp, sodium-vapour_lamp\nsofa, couch, lounge\nsoffit\nsoftball, playground_ball\nsoft_pedal\nsoil_pipe\nsolar_array, solar_battery, solar_panel\nsolar_cell, photovoltaic_cell\nsolar_dish, solar_collector, solar_furnace\nsolar_heater\nsolar_house\nsolar_telescope\nsolar_thermal_system\nsoldering_iron\nsolenoid\nsolleret, sabaton\nsombrero\nsonic_depth_finder, fathometer\nsonogram, echogram\nsonograph\nsorter\nsouk\nsound_bow\nsoundbox, body\nsound_camera\nsounder\nsound_film\nsounding_board, soundboard\nsounding_rocket\nsound_recording, audio_recording, audio\nsound_spectrograph\nsoup_bowl\nsoup_ladle\nsoupspoon, soup_spoon\nsource_of_illumination\nsourdine\nsoutache\nsoutane\nsou'wester\nsoybean_future\nspace_bar\nspace_capsule, capsule\nspacecraft, ballistic_capsule, space_vehicle\nspace_heater\nspace_helmet\nspace_rocket\nspace_shuttle\nspace_station, space_platform, space_laboratory\nspacesuit\nspade\nspade_bit\nspaghetti_junction\nSpandau\nspandex\nspandrel, spandril\nspanker\nspar\nsparge_pipe\nspark_arrester, sparker\nspark_arrester\nspark_chamber, spark_counter\nspark_coil\nspark_gap\nspark_lever\nspark_plug, sparking_plug, plug\nsparkplug_wrench\nspark_transmitter\nspat, gaiter\nspatula\nspatula\nspeakerphone\nspeaking_trumpet\nspear, lance, shaft\nspear, gig, fizgig, fishgig, lance\nspecialty_store\nspecimen_bottle\nspectacle\nspectacles, specs, eyeglasses, glasses\nspectator_pump, spectator\nspectrograph\nspectrophotometer\nspectroscope, prism_spectroscope\nspeculum\nspeedboat\nspeed_bump\nspeedometer, speed_indicator\nspeed_skate, racing_skate\nspherometer\nsphygmomanometer\nspicemill\nspice_rack\nspider\nspider_web, spider's_web\nspike\nspike\nspindle\nspindle, mandrel, mandril, arbor\nspindle\nspin_dryer, spin_drier\nspinet\nspinet\nspinnaker\nspinner\nspinning_frame\nspinning_jenny\nspinning_machine\nspinning_rod\nspinning_wheel\nspiral_bandage\nspiral_ratchet_screwdriver, ratchet_screwdriver\nspiral_spring\nspirit_lamp\nspirit_stove\nspirometer\nspit\nspittoon, cuspidor\nsplashboard, splasher, dashboard\nsplasher\nsplice, splicing\nsplicer\nsplint\nsplit_rail, fence_rail\nSpode\nspoiler\nspoiler\nspoke, wheel_spoke, radius\nspokeshave\nsponge_cloth\nsponge_mop\nspoon\nspoon\nSpork\nsporran\nsport_kite, stunt_kite\nsports_car, sport_car\nsports_equipment\nsports_implement\nsportswear, athletic_wear, activewear\nsport_utility, sport_utility_vehicle, S.U.V., SUV\nspot\nspotlight, spot\nspot_weld, spot-weld\nspouter\nsprag\nspray_gun\nspray_paint\nspreader\nsprig\nspring\nspring_balance, spring_scale\nspringboard\nsprinkler\nsprinkler_system\nsprit\nspritsail\nsprocket, sprocket_wheel\nsprocket\nspun_yarn\nspur, gad\nspur_gear, spur_wheel\nsputnik\nspy_satellite\nsquad_room\nsquare\nsquare_knot\nsquare-rigger\nsquare_sail\nsquash_ball\nsquash_racket, squash_racquet, bat\nsquawk_box, squawker, intercom_speaker\nsqueegee\nsqueezer\nsquelch_circuit, squelch, squelcher\nsquinch\nstabilizer, stabiliser\nstabilizer\nstabilizer_bar, anti-sway_bar\nstable, stalls, horse_barn\nstable_gear, saddlery, tack\nstabling\nstacks\nstaddle\nstadium, bowl, arena, sports_stadium\nstage\nstagecoach, stage\nstained-glass_window\nstair-carpet\nstair-rod\nstairwell\nstake\nstall, stand, sales_booth\nstall\nstamp\nstamp_mill, stamping_mill\nstamping_machine, stamper\nstanchion\nstand\nstandard\nstandard_cell\nstandard_transmission, stick_shift\nstanding_press\nstanhope\nStanley_Steamer\nstaple\nstaple\nstaple_gun, staplegun, tacker\nstapler, stapling_machine\nstarship, spaceship\nstarter, starter_motor, starting_motor\nstarting_gate, starting_stall\nStassano_furnace, electric-arc_furnace\nStatehouse\nstately_home\nstate_prison\nstateroom\nstatic_tube\nstation\nstator, stator_coil\nstatue\nstay\nstaysail\nsteakhouse, chophouse\nsteak_knife\nstealth_aircraft\nstealth_bomber\nstealth_fighter\nsteam_bath, steam_room, vapor_bath, vapour_bath\nsteamboat\nsteam_chest\nsteam_engine\nsteamer, steamship\nsteamer\nsteam_iron\nsteam_locomotive\nsteamroller, road_roller\nsteam_shovel\nsteam_turbine\nsteam_whistle\nsteel\nsteel_arch_bridge\nsteel_drum\nsteel_mill, steelworks, steel_plant, steel_factory\nsteel-wool_pad\nsteelyard, lever_scale, beam_scale\nsteeple, spire\nsteerage\nsteering_gear\nsteering_linkage\nsteering_system, steering_mechanism\nsteering_wheel, wheel\nstele, stela\nstem-winder\nstencil\nSten_gun\nstenograph\nstep, stair\nstep-down_transformer\nstep_stool\nstep-up_transformer\nstereo, stereophony, stereo_system, stereophonic_system\nstereoscope\nstern_chaser\nsternpost\nsternwheeler\nstethoscope\nstewing_pan, stewpan\nstick\nstick\nstick, control_stick, joystick\nstick\nstile\nstiletto\nstill\nstillroom, still_room\nStillson_wrench\nstilt\nStinger\nstink_bomb, stench_bomb\nstirrer\nstirrup, stirrup_iron\nstirrup_pump\nstob\nstock, gunstock\nstockade\nstockcar\nstock_car\nstockinet, stockinette\nstocking\nstock-in-trade\nstockpot\nstockroom, stock_room\nstocks\nstock_saddle, Western_saddle\nstockyard\nstole\nstomacher\nstomach_pump\nstone_wall\nstoneware\nstonework\nstool\nstoop, stoep\nstop_bath, short-stop, short-stop_bath\nstopcock, cock, turncock\nstopper_knot\nstopwatch, stop_watch\nstorage_battery, accumulator\nstorage_cell, secondary_cell\nstorage_ring\nstorage_space\nstoreroom, storage_room, stowage\nstorm_cellar, cyclone_cellar, tornado_cellar\nstorm_door\nstorm_window, storm_sash\nstoup, stoop\nstoup\nstove\nstove, kitchen_stove, range, kitchen_range, cooking_stove\nstove_bolt\nstovepipe\nstovepipe_iron\nStradavarius, Strad\nstraight_chair, side_chair\nstraightedge\nstraightener\nstraight_flute, straight-fluted_drill\nstraight_pin\nstraight_razor\nstrainer\nstraitjacket, straightjacket\nstrap\nstrap\nstrap_hinge, joint_hinge\nstrapless\nstreamer_fly\nstreamliner\nstreet\nstreet\nstreetcar, tram, tramcar, trolley, trolley_car\nstreet_clothes\nstreetlight, street_lamp\nstretcher\nstretcher\nstretch_pants\nstrickle\nstrickle\nstringed_instrument\nstringer\nstringer\nstring_tie\nstrip\nstrip_lighting\nstrip_mall\nstroboscope, strobe, strobe_light\nstrongbox, deedbox\nstronghold, fastness\nstrongroom\nstrop\nstructural_member\nstructure, construction\nstudent_center\nstudent_lamp\nstudent_union\nstud_finder\nstudio_apartment, studio\nstudio_couch, day_bed\nstudy\nstudy_hall\nstuffing_nut, packing_nut\nstump\nstun_gun, stun_baton\nstupa, tope\nsty, pigsty, pigpen\nstylus, style\nstylus\nsub-assembly\nsubcompact, subcompact_car\nsubmachine_gun\nsubmarine, pigboat, sub, U-boat\nsubmarine_torpedo\nsubmersible, submersible_warship\nsubmersible\nsubtracter\nsubway_token\nsubway_train\nsubwoofer\nsuction_cup\nsuction_pump\nsudatorium, sudatory\nsuede_cloth, suede\nsugar_bowl\nsugar_refinery\nsugar_spoon, sugar_shell\nsuit, suit_of_clothes\nsuite, rooms\nsuiting\nsulky\nsummer_house\nsumo_ring\nsump\nsump_pump\nsunbonnet\nSunday_best, Sunday_clothes\nsun_deck\nsundial\nsundress\nsundries\nsun_gear\nsunglass\nsunglasses, dark_glasses, shades\nsunhat, sun_hat\nsunlamp, sun_lamp, sunray_lamp, sun-ray_lamp\nsun_parlor, sun_parlour, sun_porch, sunporch, sunroom, sun_lounge, solarium\nsunroof, sunshine-roof\nsunscreen, sunblock, sun_blocker\nsunsuit\nsupercharger\nsupercomputer\nsuperconducting_supercollider\nsuperhighway, information_superhighway\nsupermarket\nsuperstructure\nsupertanker\nsupper_club\nsupplejack\nsupply_chamber\nsupply_closet\nsupport\nsupport\nsupport_column\nsupport_hose, support_stocking\nsupporting_structure\nsupporting_tower\nsurcoat\nsurface_gauge, surface_gage, scribing_block\nsurface_lift\nsurface_search_radar\nsurface_ship\nsurface-to-air_missile, SAM\nsurface-to-air_missile_system\nsurfboat\nsurcoat\nsurgeon's_knot\nsurgery\nsurge_suppressor, surge_protector, spike_suppressor, spike_arrester, lightning_arrester\nsurgical_dressing\nsurgical_instrument\nsurgical_knife\nsurplice\nsurrey\nsurtout\nsurveillance_system\nsurveying_instrument, surveyor's_instrument\nsurveyor's_level\nsushi_bar\nsuspension, suspension_system\nsuspension_bridge\nsuspensory, suspensory_bandage\nsustaining_pedal, loud_pedal\nsuture, surgical_seam\nswab, swob, mop\nswab\nswaddling_clothes, swaddling_bands\nswag\nswage_block\nswagger_stick\nswallow-tailed_coat, swallowtail, morning_coat\nswamp_buggy, marsh_buggy\nswan's_down\nswathe, wrapping\nswatter, flyswatter, flyswat\nsweat_bag\nsweatband\nsweater, jumper\nsweat_pants, sweatpants\nsweatshirt\nsweatshop\nsweat_suit, sweatsuit, sweats, workout_suit\nsweep, sweep_oar\nsweep_hand, sweep-second\nswimming_trunks, bathing_trunks\nswimsuit, swimwear, bathing_suit, swimming_costume, bathing_costume\nswing\nswing_door, swinging_door\nswitch, electric_switch, electrical_switch\nswitchblade, switchblade_knife, flick-knife, flick_knife\nswitch_engine, donkey_engine\nswivel\nswivel_chair\nswizzle_stick\nsword, blade, brand, steel\nsword_cane, sword_stick\nS_wrench\nsynagogue, temple, tabernacle\nsynchrocyclotron\nsynchroflash\nsynchromesh\nsynchronous_converter, rotary, rotary_converter\nsynchronous_motor\nsynchrotron\nsynchroscope, synchronoscope, synchronizer, synchroniser\nsynthesizer, synthesiser\nsyringe\nsystem\ntabard\nTabernacle\ntabi, tabis\ntab_key, tab\ntable\ntable\ntablefork\ntable_knife\ntable_lamp\ntable_saw\ntablespoon\ntablet-armed_chair\ntable-tennis_table, ping-pong_table, pingpong_table\ntable-tennis_racquet, table-tennis_bat, pingpong_paddle\ntabletop\ntableware\ntabor, tabour\ntaboret, tabouret\ntachistoscope, t-scope\ntachograph\ntachometer, tach\ntachymeter, tacheometer\ntack\ntack_hammer\ntaffeta\ntaffrail\ntailgate, tailboard\ntaillight, tail_lamp, rear_light, rear_lamp\ntailor-made\ntailor's_chalk\ntailpipe\ntail_rotor, anti-torque_rotor\ntailstock\ntake-up\ntalaria\ntalcum, talcum_powder\ntam, tam-o'-shanter, tammy\ntambour\ntambour, embroidery_frame, embroidery_hoop\ntambourine\ntammy\ntamp, tamper, tamping_bar\nTampax\ntampion, tompion\ntampon\ntandoor\ntangram\ntank, storage_tank\ntank, army_tank, armored_combat_vehicle, armoured_combat_vehicle\ntankard\ntank_car, tank\ntank_destroyer\ntank_engine, tank_locomotive\ntanker_plane\ntank_shell\ntank_top\ntannoy\ntap, spigot\ntapa, tappa\ntape, tape_recording, taping\ntape, tapeline, tape_measure\ntape_deck\ntape_drive, tape_transport, transport\ntape_player\ntape_recorder, tape_machine\ntaper_file\ntapestry, tapis\ntappet\ntap_wrench\ntare\ntarget, butt\ntarget_acquisition_system\ntarmacadam, tarmac, macadam\ntarpaulin, tarp\ntartan, plaid\ntasset, tasse\ntattoo\ntavern, tap_house\ntawse\ntaximeter\nT-bar_lift, T-bar, Alpine_lift\ntea_bag\ntea_ball\ntea_cart, teacart, tea_trolley, tea_wagon\ntea_chest\nteaching_aid\nteacup\ntea_gown\nteakettle\ntea_maker\nteapot\nteashop, teahouse, tearoom, tea_parlor, tea_parlour\nteaspoon\ntea-strainer\ntea_table\ntea_tray\ntea_urn\ntee, golf_tee\ntee_hinge, T_hinge\ntelecom_hotel, telco_building\ntelecommunication_system, telecom_system, telecommunication_equipment, telecom_equipment\ntelegraph, telegraphy\ntelegraph_key\ntelemeter\ntelephone, phone, telephone_set\ntelephone_bell\ntelephone_booth, phone_booth, call_box, telephone_box, telephone_kiosk\ntelephone_cord, phone_cord\ntelephone_jack, phone_jack\ntelephone_line, phone_line, telephone_circuit, subscriber_line, line\ntelephone_plug, phone_plug\ntelephone_pole, telegraph_pole, telegraph_post\ntelephone_receiver, receiver\ntelephone_system, phone_system\ntelephone_wire, telephone_line, telegraph_wire, telegraph_line\ntelephoto_lens, zoom_lens\nTeleprompter\ntelescope, scope\ntelescopic_sight, telescope_sight\ntelethermometer\nteletypewriter, teleprinter, teletype_machine, telex, telex_machine\ntelevision, television_system\ntelevision_antenna, tv-antenna\ntelevision_camera, tv_camera, camera\ntelevision_equipment, video_equipment\ntelevision_monitor, tv_monitor\ntelevision_receiver, television, television_set, tv, tv_set, idiot_box, boob_tube, telly, goggle_box\ntelevision_room, tv_room\ntelevision_transmitter\ntelpher, telfer\ntelpherage, telferage\ntempera, poster_paint, poster_color, poster_colour\ntemple\ntemple\ntemporary_hookup, patch\ntender, supply_ship\ntender, ship's_boat, pinnace, cutter\ntender\ntenement, tenement_house\ntennis_ball\ntennis_camp\ntennis_racket, tennis_racquet\ntenon\ntenor_drum, tom-tom\ntenoroon\ntenpenny_nail\ntenpin\ntensimeter\ntensiometer\ntensiometer\ntensiometer\ntent, collapsible_shelter\ntenter\ntenterhook\ntent-fly, rainfly, fly_sheet, fly, tent_flap\ntent_peg\ntepee, tipi, teepee\nterminal, pole\nterminal\nterraced_house\nterra_cotta\nterrarium\nterra_sigillata, Samian_ware\nterry, terry_cloth, terrycloth\nTesla_coil\ntessera\ntest_equipment\ntest_rocket, research_rocket, test_instrument_vehicle\ntest_room, testing_room\ntestudo\ntetraskelion, tetraskele\ntetrode\ntextile_machine\ntextile_mill\nthatch, thatched_roof\ntheater, theatre, house\ntheater_curtain, theatre_curtain\ntheater_light\ntheodolite, transit\ntheremin\nthermal_printer\nthermal_reactor\nthermocouple, thermocouple_junction\nthermoelectric_thermometer, thermel, electric_thermometer\nthermograph, thermometrograph\nthermograph\nthermohydrometer, thermogravimeter\nthermojunction\nthermometer\nthermonuclear_reactor, fusion_reactor\nthermopile\nthermos, thermos_bottle, thermos_flask\nthermostat, thermoregulator\nthigh_pad\nthill\nthimble\nthinning_shears\nthird_base, third\nthird_gear, third\nthird_rail\nthong\nthong\nthree-centered_arch, basket-handle_arch\nthree-decker\nthree-dimensional_radar, 3d_radar\nthree-piece_suit\nthree-quarter_binding\nthree-way_switch, three-point_switch\nthresher, thrasher, threshing_machine\nthreshing_floor\nthriftshop, second-hand_store\nthroat_protector\nthrone\nthrust_bearing\nthruster\nthumb\nthumbhole\nthumbscrew\nthumbstall\nthumbtack, drawing_pin, pushpin\nthunderer\nthwart, cross_thwart\ntiara\nticking\ntickler_coil\ntie, tie_beam\ntie, railroad_tie, crosstie, sleeper\ntie_rack\ntie_rod\ntights, leotards\ntile\ntile_cutter\ntile_roof\ntiller\ntilter\ntilt-top_table, tip-top_table, tip_table\ntimber\ntimber\ntimber_hitch\ntimbrel\ntime_bomb, infernal_machine\ntime_capsule\ntime_clock\ntime-delay_measuring_instrument, time-delay_measuring_system\ntime-fuse\ntimepiece, timekeeper, horologe\ntimer\ntimer\ntime-switch\ntin\ntinderbox\ntine\ntinfoil, tin_foil\ntippet\ntire_chain, snow_chain\ntire_iron, tire_tool\ntitfer\ntithe_barn\ntitrator\ntoaster\ntoaster_oven\ntoasting_fork\ntoastrack\ntobacco_pouch\ntobacco_shop, tobacconist_shop, tobacconist\ntoboggan\ntoby, toby_jug, toby_fillpot_jug\ntocsin, warning_bell\ntoe\ntoecap\ntoehold\ntoga\ntoga_virilis\ntoggle\ntoggle_bolt\ntoggle_joint\ntoggle_switch, toggle, on-off_switch, on/off_switch\ntogs, threads, duds\ntoilet, lavatory, lav, can, john, privy, bathroom\ntoilet_bag, sponge_bag\ntoilet_bowl\ntoilet_kit, travel_kit\ntoilet_powder, bath_powder, dusting_powder\ntoiletry, toilet_articles\ntoilet_seat\ntoilet_water, eau_de_toilette\ntokamak\ntoken\ntollbooth, tolbooth, tollhouse\ntoll_bridge\ntollgate, tollbar\ntoll_line\ntomahawk, hatchet\nTommy_gun, Thompson_submachine_gun\ntomograph\ntone_arm, pickup, pickup_arm\ntoner\ntongs, pair_of_tongs\ntongue\ntongue_and_groove_joint\ntongue_depressor\ntonometer\ntool\ntool_bag\ntoolbox, tool_chest, tool_cabinet, tool_case\ntoolshed, toolhouse\ntooth\ntooth\ntoothbrush\ntoothpick\ntop\ntop, cover\ntopgallant, topgallant_mast\ntopgallant, topgallant_sail\ntopiary\ntopknot\ntopmast\ntopper\ntopsail\ntoque\ntorch\ntorpedo\ntorpedo\ntorpedo\ntorpedo_boat\ntorpedo-boat_destroyer\ntorpedo_tube\ntorque_converter\ntorque_wrench\ntorture_chamber\ntotem_pole\ntouch_screen, touchscreen\ntoupee, toupe\ntouring_car, phaeton, tourer\ntourist_class, third_class\ntowel\ntoweling, towelling\ntowel_rack, towel_horse\ntowel_rail, towel_bar\ntower\ntown_hall\ntowpath, towing_path\ntow_truck, tow_car, wrecker\ntoy\ntoy_box, toy_chest\ntoyshop\ntrace_detector\ntrack, rail, rails, runway\ntrack\ntrackball\ntracked_vehicle\ntract_house\ntract_housing\ntraction_engine\ntractor\ntractor\ntrail_bike, dirt_bike, scrambler\ntrailer, house_trailer\ntrailer\ntrailer_camp, trailer_park\ntrailer_truck, tractor_trailer, trucking_rig, rig, articulated_lorry, semi\ntrailing_edge\ntrain, railroad_train\ntramline, tramway, streetcar_track\ntrammel\ntrampoline\ntramp_steamer, tramp\ntramway, tram, aerial_tramway, cable_tramway, ropeway\ntransdermal_patch, skin_patch\ntransept\ntransformer\ntransistor, junction_transistor, electronic_transistor\ntransit_instrument\ntransmission, transmission_system\ntransmission_shaft\ntransmitter, sender\ntransom, traverse\ntransom, transom_window, fanlight\ntransponder\ntransporter\ntransporter, car_transporter\ntransport_ship\ntrap\ntrap_door\ntrapeze\ntrave, traverse, crossbeam, crosspiece\ntravel_iron\ntrawl, dragnet, trawl_net\ntrawl, trawl_line, spiller, setline, trotline\ntrawler, dragger\ntray\ntray_cloth\ntread\ntread\ntreadmill, treadwheel, tread-wheel\ntreadmill\ntreasure_chest\ntreasure_ship\ntreenail, trenail, trunnel\ntrefoil_arch\ntrellis, treillage\ntrench\ntrench_coat\ntrench_knife\ntrepan\ntrepan, trephine\ntrestle\ntrestle\ntrestle_bridge\ntrestle_table\ntrestlework\ntrews\ntrial_balloon\ntriangle\ntriangle\ntriclinium\ntriclinium\ntricorn, tricorne\ntricot\ntricycle, trike, velocipede\ntrident\ntrigger\ntrimaran\ntrimmer\ntrimmer_arch\ntriode\ntripod\ntriptych\ntrip_wire\ntrireme\ntriskelion, triskele\ntriumphal_arch\ntrivet\ntrivet\ntroika\ntroll\ntrolleybus, trolley_coach, trackless_trolley\ntrombone\ntroop_carrier, troop_transport\ntroopship\ntrophy_case\ntrough\ntrouser\ntrouser_cuff\ntrouser_press, pants_presser\ntrouser, pant\ntrousseau\ntrowel\ntruck, motortruck\ntrumpet_arch\ntruncheon, nightstick, baton, billy, billystick, billy_club\ntrundle_bed, trundle, truckle_bed, truckle\ntrunk\ntrunk_hose\ntrunk_lid\ntrunk_line\ntruss\ntruss_bridge\ntry_square\nT-square\ntub, vat\ntube, vacuum_tube, thermionic_vacuum_tube, thermionic_tube, electron_tube, thermionic_valve\ntuck_box\ntucker\ntucker-bag\ntuck_shop\nTudor_arch, four-centered_arch\ntudung\ntugboat, tug, towboat, tower\ntulle\ntumble-dryer, tumble_drier\ntumbler\ntumbrel, tumbril\ntun\ntunic\ntuning_fork\ntupik, tupek, sealskin_tent\nturban\nturbine\nturbogenerator\ntureen\nTurkish_bath\nTurkish_towel, terry_towel\nTurk's_head\nturnbuckle\nturner, food_turner\nturnery\nturnpike\nturnspit\nturnstile\nturntable\nturntable, lazy_Susan\nturret\nturret_clock\nturtleneck, turtle, polo-neck\ntweed\ntweeter\ntwenty-two, .22\ntwenty-two_pistol\ntwenty-two_rifle\ntwill\ntwill, twill_weave\ntwin_bed\ntwinjet\ntwist_bit, twist_drill\ntwo-by-four\ntwo-man_tent\ntwo-piece, two-piece_suit, lounge_suit\ntypesetting_machine\ntypewriter\ntypewriter_carriage\ntypewriter_keyboard\ntyrolean, tirolean\nuke, ukulele\nulster\nultracentrifuge\nultramicroscope, dark-field_microscope\nUltrasuede\nultraviolet_lamp, ultraviolet_source\numbrella\numbrella_tent\nundercarriage\nundercoat, underseal\nundergarment, unmentionable\nunderpants\nunderwear, underclothes, underclothing\nundies\nuneven_parallel_bars, uneven_bars\nunicycle, monocycle\nuniform\nuniversal_joint, universal\nuniversity\nupholstery\nupholstery_material\nupholstery_needle\nuplift\nupper_berth, upper\nupright, upright_piano\nupset, swage\nupstairs\nurceole\nurn\nurn\nused-car, secondhand_car\nutensil\nUzi\nvacation_home\nvacuum, vacuum_cleaner\nvacuum_chamber\nvacuum_flask, vacuum_bottle\nvacuum_gauge, vacuum_gage\nValenciennes, Valenciennes_lace\nvalise\nvalve\nvalve\nvalve-in-head_engine\nvambrace, lower_cannon\nvan\nvan, caravan\nvane\nvaporizer, vaporiser\nvariable-pitch_propeller\nvariometer\nvarnish\nvase\nvault\nvault, bank_vault\nvaulting_horse, long_horse, buck\nvehicle\nVelcro\nvelocipede\nvelour, velours\nvelvet\nvelveteen\nvending_machine\nveneer, veneering\nVenetian_blind\nVenn_diagram, Venn's_diagram\nventilation, ventilation_system, ventilating_system\nventilation_shaft\nventilator\nveranda, verandah, gallery\nverdigris\nvernier_caliper, vernier_micrometer\nvernier_scale, vernier\nvertical_file\nvertical_stabilizer, vertical_stabiliser, vertical_fin, tail_fin, tailfin\nvertical_tail\nVery_pistol, Verey_pistol\nvessel, watercraft\nvessel\nvest, waistcoat\nvestiture\nvestment\nvest_pocket\nvestry, sacristy\nviaduct\nvibraphone, vibraharp, vibes\nvibrator\nvibrator\nVictrola\nvicuna\nvideocassette\nvideocassette_recorder, VCR\nvideodisk, videodisc, DVD\nvideo_recording, video\nvideotape\nvideotape\nvigil_light, vigil_candle\nvilla\nvilla\nvilla\nviol\nviola\nviola_da_braccio\nviola_da_gamba, gamba, bass_viol\nviola_d'amore\nviolin, fiddle\nvirginal, pair_of_virginals\nviscometer, viscosimeter\nviscose_rayon, viscose\nvise, bench_vise\nvisor, vizor\nvisual_display_unit, VDU\nvivarium\nViyella\nvoile\nvolleyball\nvolleyball_net\nvoltage_regulator\nvoltaic_cell, galvanic_cell, primary_cell\nvoltaic_pile, pile, galvanic_pile\nvoltmeter\nvomitory\nvon_Neumann_machine\nvoting_booth\nvoting_machine\nvoussoir\nvox_angelica, voix_celeste\nvox_humana\nwaders\nwading_pool\nwaffle_iron\nwagon, waggon\nwagon, coaster_wagon\nwagon_tire\nwagon_wheel\nwain\nwainscot, wainscoting, wainscotting\nwainscoting, wainscotting\nwaist_pack, belt_bag\nwalker, baby-walker, go-cart\nwalker, Zimmer, Zimmer_frame\nwalker\nwalkie-talkie, walky-talky\nwalk-in\nwalking_shoe\nwalking_stick\nWalkman\nwalk-up_apartment, walk-up\nwall\nwall\nwall_clock\nwallet, billfold, notecase, pocketbook\nwall_tent\nwall_unit\nwand\nWankel_engine, Wankel_rotary_engine, epitrochoidal_engine\nward, hospital_ward\nwardrobe, closet, press\nwardroom\nwarehouse, storage_warehouse\nwarming_pan\nwar_paint\nwarplane, military_plane\nwar_room\nwarship, war_vessel, combat_ship\nwash\nwash-and-wear\nwashbasin, handbasin, washbowl, lavabo, wash-hand_basin\nwashboard, splashboard\nwashboard\nwasher, automatic_washer, washing_machine\nwasher\nwashhouse\nwashroom\nwashstand, wash-hand_stand\nwashtub\nwastepaper_basket, waste-paper_basket, wastebasket, waste_basket, circular_file\nwatch, ticker\nwatch_cap\nwatch_case\nwatch_glass\nwatchtower\nwater-base_paint\nwater_bed\nwater_bottle\nwater_butt\nwater_cart\nwater_chute\nwater_closet, closet, W.C., loo\nwatercolor, water-color, watercolour, water-colour\nwater-cooled_reactor\nwater_cooler\nwater_faucet, water_tap, tap, hydrant\nwater_filter\nwater_gauge, water_gage, water_glass\nwater_glass\nwater_hazard\nwater_heater, hot-water_heater, hot-water_tank\nwatering_can, watering_pot\nwatering_cart\nwater_jacket\nwater_jug\nwater_jump\nwater_level\nwater_meter\nwater_mill\nwaterproof\nwaterproofing\nwater_pump\nwater_scooter, sea_scooter, scooter\nwater_ski\nwaterspout\nwater_tower\nwater_wagon, water_waggon\nwaterwheel, water_wheel\nwaterwheel, water_wheel\nwater_wings\nwaterworks\nwattmeter\nwaxwork, wax_figure\nways, shipway, slipway\nweapon, arm, weapon_system\nweaponry, arms, implements_of_war, weapons_system, munition\nweapons_carrier\nweathercock\nweatherglass\nweather_satellite, meteorological_satellite\nweather_ship\nweathervane, weather_vane, vane, wind_vane\nweb, entanglement\nweb\nwebbing\nwebcam\nwedge\nwedge\nwedgie\nWedgwood\nweeder, weed-whacker\nweeds, widow's_weeds\nweekender\nweighbridge\nweight, free_weight, exercising_weight\nweir\nweir\nwelcome_wagon\nweld\nwelder's_mask\nweldment\nwell\nwellhead\nwelt\nWeston_cell, cadmium_cell\nwet_bar\nwet-bulb_thermometer\nwet_cell\nwet_fly\nwet_suit\nwhaleboat\nwhaler, whaling_ship\nwhaling_gun\nwheel\nwheel\nwheel_and_axle\nwheelchair\nwheeled_vehicle\nwheelwork\nwherry\nwherry, Norfolk_wherry\nwhetstone\nwhiffletree, whippletree, swingletree\nwhip\nwhipcord\nwhipping_post\nwhipstitch, whipping, whipstitching\nwhirler\nwhisk, whisk_broom\nwhisk\nwhiskey_bottle\nwhiskey_jug\nwhispering_gallery, whispering_dome\nwhistle\nwhistle\nwhite\nwhite_goods\nwhitewash\nwhorehouse, brothel, bordello, bagnio, house_of_prostitution, house_of_ill_repute, bawdyhouse, cathouse, sporting_house\nwick, taper\nwicker, wickerwork, caning\nwicker_basket\nwicket, hoop\nwicket\nwickiup, wikiup\nwide-angle_lens, fisheye_lens\nwidebody_aircraft, wide-body_aircraft, wide-body, twin-aisle_airplane\nwide_wale\nwidow's_walk\nWiffle, Wiffle_Ball\nwig\nwigwam\nWilton, Wilton_carpet\nwimple\nwincey\nwinceyette\nwinch, windlass\nWinchester\nwindbreak, shelterbelt\nwinder, key\nwind_instrument, wind\nwindjammer\nwindmill, aerogenerator, wind_generator\nwindmill\nwindow\nwindow\nwindow_blind\nwindow_box\nwindow_envelope\nwindow_frame\nwindow_screen\nwindow_seat\nwindow_shade\nwindowsill\nwindshield, windscreen\nwindshield_wiper, windscreen_wiper, wiper, wiper_blade\nWindsor_chair\nWindsor_knot\nWindsor_tie\nwind_tee\nwind_tunnel\nwind_turbine\nwine_bar\nwine_bottle\nwine_bucket, wine_cooler\nwine_cask, wine_barrel\nwineglass\nwinepress\nwinery, wine_maker\nwineskin\nwing\nwing_chair\nwing_nut, wing-nut, wing_screw, butterfly_nut, thumbnut\nwing_tip\nwing_tip\nwinker, blinker, blinder\nwiper, wiper_arm, contact_arm\nwiper_motor\nwire\nwire, conducting_wire\nwire_cloth\nwire_cutter\nwire_gauge, wire_gage\nwireless_local_area_network, WLAN, wireless_fidelity, WiFi\nwire_matrix_printer, wire_printer, stylus_printer\nwire_recorder\nwire_stripper\nwirework, grillwork\nwiring\nwishing_cap\nwitness_box, witness_stand\nwok\nwoman's_clothing\nwood\nwoodcarving\nwood_chisel\nwoodenware\nwooden_spoon\nwoodscrew\nwoodshed\nwood_vise, woodworking_vise, shoulder_vise\nwoodwind, woodwind_instrument, wood\nwoof, weft, filling, pick\nwoofer\nwool, woolen, woollen\nworkbasket, workbox, workbag\nworkbench, work_bench, bench\nwork-clothing, work-clothes\nworkhouse\nworkhouse\nworkpiece\nworkroom\nworks, workings\nwork-shirt\nworkstation\nworktable, work_table\nworkwear\nWorld_Wide_Web, WWW, web\nworm_fence, snake_fence, snake-rail_fence, Virginia_fence\nworm_gear\nworm_wheel\nworsted\nworsted, worsted_yarn\nwrap, wrapper\nwraparound\nwrapping, wrap, wrapper\nwreck\nwrench, spanner\nwrestling_mat\nwringer\nwrist_pad\nwrist_pin, gudgeon_pin\nwristwatch, wrist_watch\nwriting_arm\nwriting_desk\nwriting_desk\nwriting_implement\nxerographic_printer\nXerox, xerographic_copier, Xerox_machine\nX-ray_film\nX-ray_machine\nX-ray_tube\nyacht, racing_yacht\nyacht_chair\nyagi, Yagi_aerial\nyard\nyard\nyardarm\nyard_marker\nyardstick, yard_measure\nyarmulke, yarmulka, yarmelke\nyashmak, yashmac\nyataghan\nyawl, dandy\nyawl\nyoke\nyoke\nyoke, coupling\nyurt\nZamboni\nzero\nziggurat, zikkurat, zikurat\nzill\nzip_gun\nzither, cither, zithern\nzoot_suit\nshading\ngrain\nwood_grain, woodgrain, woodiness\ngraining, woodgraining\nmarbleization, marbleisation, marbleizing, marbleising\nlight, lightness\naura, aureole, halo, nimbus, glory, gloriole\nsunniness\nglint\nopalescence, iridescence\npolish, gloss, glossiness, burnish\nprimary_color_for_pigments, primary_colour_for_pigments\nprimary_color_for_light, primary_colour_for_light\ncolorlessness, colourlessness, achromatism, achromaticity\nmottle\nachromia\nshade, tint, tincture, tone\nchromatic_color, chromatic_colour, spectral_color, spectral_colour\nblack, blackness, inkiness\ncoal_black, ebony, jet_black, pitch_black, sable, soot_black\nalabaster\nbone, ivory, pearl, off-white\ngray, grayness, grey, greyness\nash_grey, ash_gray, silver, silver_grey, silver_gray\ncharcoal, charcoal_grey, charcoal_gray, oxford_grey, oxford_gray\nsanguine\nTurkey_red, alizarine_red\ncrimson, ruby, deep_red\ndark_red\nclaret\nfuschia\nmaroon\norange, orangeness\nreddish_orange\nyellow, yellowness\ngamboge, lemon, lemon_yellow, maize\npale_yellow, straw, wheat\ngreen, greenness, viridity\ngreenishness\nsea_green\nsage_green\nbottle_green\nemerald\nolive_green, olive-green\njade_green, jade\nblue, blueness\nazure, cerulean, sapphire, lazuline, sky-blue\nsteel_blue\ngreenish_blue, aqua, aquamarine, turquoise, cobalt_blue, peacock_blue\npurplish_blue, royal_blue\npurple, purpleness\nTyrian_purple\nindigo\nlavender\nreddish_purple, royal_purple\npink\ncarnation\nrose, rosiness\nchestnut\nchocolate, coffee, deep_brown, umber, burnt_umber\nlight_brown\ntan, topaz\nbeige, ecru\nreddish_brown, sepia, burnt_sienna, Venetian_red, mahogany\nbrick_red\ncopper, copper_color\nIndian_red\npuce\nolive\nultramarine\ncomplementary_color, complementary\npigmentation\ncomplexion, skin_color, skin_colour\nruddiness, rosiness\nnonsolid_color, nonsolid_colour, dithered_color, dithered_colour\naposematic_coloration, warning_coloration\ncryptic_coloration\nring\ncenter_of_curvature, centre_of_curvature\ncadaver, corpse, stiff, clay, remains\nmandibular_notch\nrib\nskin, tegument, cutis\nskin_graft\nepidermal_cell\nmelanocyte\nprickle_cell\ncolumnar_cell, columnar_epithelial_cell\nspongioblast\nsquamous_cell\namyloid_plaque, amyloid_protein_plaque\ndental_plaque, bacterial_plaque\nmacule, macula\nfreckle, lentigo\nbouffant\nsausage_curl\nforelock\nspit_curl, kiss_curl\npigtail\npageboy\npompadour\nthatch\nsoup-strainer, toothbrush\nmustachio, moustachio, handle-bars\nwalrus_mustache, walrus_moustache\nstubble\nvandyke_beard, vandyke\nsoul_patch, Attilio\nesophageal_smear\nparaduodenal_smear, duodenal_smear\nspecimen\npunctum\nglenoid_fossa, glenoid_cavity\ndiastema\nmarrow, bone_marrow\nmouth, oral_cavity, oral_fissure, rima_oris\ncanthus\nmilk\nmother's_milk\ncolostrum, foremilk\nvein, vena, venous_blood_vessel\nganglion_cell, gangliocyte\nX_chromosome\nembryonic_cell, formative_cell\nmyeloblast\nsideroblast\nosteocyte\nmegalocyte, macrocyte\nleukocyte, leucocyte, white_blood_cell, white_cell, white_blood_corpuscle, white_corpuscle, WBC\nhistiocyte\nfixed_phagocyte\nlymphocyte, lymph_cell\nmonoblast\nneutrophil, neutrophile\nmicrophage\nsickle_cell\nsiderocyte\nspherocyte\nootid\noocyte\nspermatid\nLeydig_cell, Leydig's_cell\nstriated_muscle_cell, striated_muscle_fiber\nsmooth_muscle_cell\nRanvier's_nodes, nodes_of_Ranvier\nneuroglia, glia\nastrocyte\nprotoplasmic_astrocyte\noligodendrocyte\nproprioceptor\ndendrite\nsensory_fiber, afferent_fiber\nsubarachnoid_space\ncerebral_cortex, cerebral_mantle, pallium, cortex\nrenal_cortex\nprepuce, foreskin\nhead, caput\nscalp\nfrontal_eminence\nsuture, sutura, fibrous_joint\nforamen_magnum\nesophagogastric_junction, oesophagogastric_junction\nheel\ncuticle\nhangnail, agnail\nexoskeleton\nabdominal_wall\nlemon\ncoordinate_axis\nlandscape\nmedium\nvehicle\npaper\nchannel, transmission_channel\nfilm, cinema, celluloid\nsilver_screen\nfree_press\npress, public_press\nprint_media\nstorage_medium, data-storage_medium\nmagnetic_storage_medium, magnetic_medium, magnetic_storage\njournalism, news_media\nFleet_Street\nphotojournalism\nnews_photography\nrotogravure\nnewspaper, paper\ndaily\ngazette\nschool_newspaper, school_paper\ntabloid, rag, sheet\nyellow_journalism, tabloid, tab\ntelecommunication, telecom\ntelephone, telephony\nvoice_mail, voicemail\ncall, phone_call, telephone_call\ncall-back\ncollect_call\ncall_forwarding\ncall-in\ncall_waiting\ncrank_call\nlocal_call\nlong_distance, long-distance_call, trunk_call\ntoll_call\nwake-up_call\nthree-way_calling\ntelegraphy\ncable, cablegram, overseas_telegram\nwireless\nradiotelegraph, radiotelegraphy, wireless_telegraphy\nradiotelephone, radiotelephony, wireless_telephone\nbroadcasting\nRediffusion\nmultiplex\nradio, radiocommunication, wireless\ntelevision, telecasting, TV, video\ncable_television, cable\nhigh-definition_television, HDTV\nreception\nsignal_detection, detection\nHakham\nweb_site, website, internet_site, site\nchat_room, chatroom\nportal_site, portal\njotter\nbreviary\nwordbook\ndesk_dictionary, collegiate_dictionary\nreckoner, ready_reckoner\ndocument, written_document, papers\nalbum, record_album\nconcept_album\nrock_opera\ntribute_album, benefit_album\nmagazine, mag\ncolour_supplement\ncomic_book\nnews_magazine\npulp, pulp_magazine\nslick, slick_magazine, glossy\ntrade_magazine\nmovie, film, picture, moving_picture, moving-picture_show, motion_picture, motion-picture_show, picture_show, pic, flick\nouttake\nshoot-'em-up\nspaghetti_Western\nencyclical, encyclical_letter\ncrossword_puzzle, crossword\nsign\nstreet_sign\ntraffic_light, traffic_signal, stoplight\nswastika, Hakenkreuz\nconcert\nartwork, art, graphics, nontextual_matter\nlobe\nbook_jacket, dust_cover, dust_jacket, dust_wrapper\ncairn\nthree-day_event\ncomfort_food\ncomestible, edible, eatable, pabulum, victual, victuals\ntuck\ncourse\ndainty, delicacy, goody, kickshaw, treat\ndish\nfast_food\nfinger_food\ningesta\nkosher\nfare\ndiet\ndiet\ndietary\nbalanced_diet\nbland_diet, ulcer_diet\nclear_liquid_diet\ndiabetic_diet\ndietary_supplement\ncarbohydrate_loading, carbo_loading\nfad_diet\ngluten-free_diet\nhigh-protein_diet\nhigh-vitamin_diet, vitamin-deficiency_diet\nlight_diet\nliquid_diet\nlow-calorie_diet\nlow-fat_diet\nlow-sodium_diet, low-salt_diet, salt-free_diet\nmacrobiotic_diet\nreducing_diet, obesity_diet\nsoft_diet, pap, spoon_food\nvegetarianism\nmenu\nchow, chuck, eats, grub\nboard, table\nmess\nration\nfield_ration\nK_ration\nC-ration\nfoodstuff, food_product\nstarches\nbreadstuff\ncoloring, colouring, food_coloring, food_colouring, food_color, food_colour\nconcentrate\ntomato_concentrate\nmeal\nkibble\ncornmeal, Indian_meal\nfarina\nmatzo_meal, matzoh_meal, matzah_meal\noatmeal, rolled_oats\npea_flour\nroughage, fiber\nbran\nflour\nplain_flour\nwheat_flour\nwhole_wheat_flour, graham_flour, graham, whole_meal_flour\nsoybean_meal, soybean_flour, soy_flour\nsemolina\ncorn_gluten_feed\nnutriment, nourishment, nutrition, sustenance, aliment, alimentation, victuals\ncommissariat, provisions, provender, viands, victuals\nlarder\nfrozen_food, frozen_foods\ncanned_food, canned_foods, canned_goods, tinned_goods\ncanned_meat, tinned_meat\nSpam\ndehydrated_food, dehydrated_foods\nsquare_meal\nmeal, repast\npotluck\nrefection\nrefreshment\nbreakfast\ncontinental_breakfast, petit_dejeuner\nbrunch\nlunch, luncheon, tiffin, dejeuner\nbusiness_lunch\nhigh_tea\ntea, afternoon_tea, teatime\ndinner\nsupper\nbuffet\npicnic\ncookout\nbarbecue, barbeque\nclambake\nfish_fry\nbite, collation, snack\nnosh\nnosh-up\nploughman's_lunch\ncoffee_break, tea_break\nbanquet, feast, spread\nentree, main_course\npiece_de_resistance\nplate\nadobo\nside_dish, side_order, entremets\nspecial\ncasserole\nchicken_casserole\nchicken_cacciatore, chicken_cacciatora, hunter's_chicken\nantipasto\nappetizer, appetiser, starter\ncanape\ncocktail\nfruit_cocktail\ncrab_cocktail\nshrimp_cocktail\nhors_d'oeuvre\nrelish\ndip\nbean_dip\ncheese_dip\nclam_dip\nguacamole\nsoup\nsoup_du_jour\nalphabet_soup\nconsomme\nmadrilene\nbisque\nborsch, borsh, borscht, borsht, borshch, bortsch\nbroth\nbarley_water\nbouillon\nbeef_broth, beef_stock\nchicken_broth, chicken_stock\nbroth, stock\nstock_cube\nchicken_soup\ncock-a-leekie, cocky-leeky\ngazpacho\ngumbo\njulienne\nmarmite\nmock_turtle_soup\nmulligatawny\noxtail_soup\npea_soup\npepper_pot, Philadelphia_pepper_pot\npetite_marmite, minestrone, vegetable_soup\npotage, pottage\npottage\nturtle_soup, green_turtle_soup\neggdrop_soup\nchowder\ncorn_chowder\nclam_chowder\nManhattan_clam_chowder\nNew_England_clam_chowder\nfish_chowder\nwon_ton, wonton, wonton_soup\nsplit-pea_soup\ngreen_pea_soup, potage_St._Germain\nlentil_soup\nScotch_broth\nvichyssoise\nstew\nbigos\nBrunswick_stew\nburgoo\nburgoo\nolla_podrida, Spanish_burgoo\nmulligan_stew, mulligan, Irish_burgoo\npurloo, chicken_purloo, poilu\ngoulash, Hungarian_goulash, gulyas\nhotchpotch\nhot_pot, hotpot\nbeef_goulash\npork-and-veal_goulash\nporkholt\nIrish_stew\noyster_stew\nlobster_stew\nlobscouse, lobscuse, scouse\nfish_stew\nbouillabaisse\nmatelote\npaella\nfricassee\nchicken_stew\nturkey_stew\nbeef_stew\nragout\nratatouille\nsalmi\npot-au-feu\nslumgullion\nsmorgasbord\nviand\nready-mix\nbrownie_mix\ncake_mix\nlemonade_mix\nself-rising_flour, self-raising_flour\nchoice_morsel, tidbit, titbit\nsavory, savoury\ncalf's-foot_jelly\ncaramel, caramelized_sugar\nlump_sugar\ncane_sugar\ncastor_sugar, caster_sugar\npowdered_sugar\ngranulated_sugar\nicing_sugar\ncorn_sugar\nbrown_sugar\ndemerara, demerara_sugar\nsweet, confection\nconfectionery\nconfiture\nsweetmeat\ncandy, confect\ncandy_bar\ncarob_bar\nhardbake\nhard_candy\nbarley-sugar, barley_candy\nbrandyball\njawbreaker\nlemon_drop\nsourball\npatty\npeppermint_patty\nbonbon\nbrittle, toffee, toffy\npeanut_brittle\nchewing_gum, gum\ngum_ball\nbubble_gum\nbutterscotch\ncandied_fruit, succade, crystallized_fruit\ncandied_apple, candy_apple, taffy_apple, caramel_apple, toffee_apple\ncrystallized_ginger\ngrapefruit_peel\nlemon_peel\norange_peel\ncandied_citrus_peel\ncandy_cane\ncandy_corn\ncaramel\ncenter, centre\ncomfit\ncotton_candy, spun_sugar, candyfloss\ndragee\ndragee\nfondant\nfudge\nchocolate_fudge\ndivinity, divinity_fudge\npenuche, penoche, panoche, panocha\ngumdrop\njujube\nhoney_crisp\nmint, mint_candy\nhorehound\npeppermint, peppermint_candy\njelly_bean, jelly_egg\nkiss, candy_kiss\nmolasses_kiss\nmeringue_kiss\nchocolate_kiss\nlicorice, liquorice\nLife_Saver\nlollipop, sucker, all-day_sucker\nlozenge\ncachou\ncough_drop, troche, pastille, pastil\nmarshmallow\nmarzipan, marchpane\nnougat\nnougat_bar\nnut_bar\npeanut_bar\npopcorn_ball\npraline\nrock_candy\nrock_candy, rock\nsugar_candy\nsugarplum\ntaffy\nmolasses_taffy\ntruffle, chocolate_truffle\nTurkish_Delight\ndessert, sweet, afters\nambrosia, nectar\nambrosia\nbaked_Alaska\nblancmange\ncharlotte\ncompote, fruit_compote\ndumpling\nflan\nfrozen_dessert\njunket\nmousse\nmousse\npavlova\npeach_melba\nwhip\nprune_whip\npudding\npudding, pud\nsyllabub, sillabub\ntiramisu\ntrifle\ntipsy_cake\njello, Jell-O\napple_dumpling\nice, frappe\nwater_ice, sorbet\nice_cream, icecream\nice-cream_cone\nchocolate_ice_cream\nNeapolitan_ice_cream\npeach_ice_cream\nsherbert, sherbet\nstrawberry_ice_cream\ntutti-frutti\nvanilla_ice_cream\nice_lolly, lolly, lollipop, popsicle\nice_milk\nfrozen_yogurt\nsnowball\nsnowball\nparfait\nice-cream_sundae, sundae\nsplit\nbanana_split\nfrozen_pudding\nfrozen_custard, soft_ice_cream\npudding\nflummery\nfish_mousse\nchicken_mousse\nchocolate_mousse\nplum_pudding, Christmas_pudding\ncarrot_pudding\ncorn_pudding\nsteamed_pudding\nduff, plum_duff\nvanilla_pudding\nchocolate_pudding\nbrown_Betty\nNesselrode, Nesselrode_pudding\npease_pudding\ncustard\ncreme_caramel\ncreme_anglais\ncreme_brulee\nfruit_custard\ntapioca\ntapioca_pudding\nroly-poly, roly-poly_pudding\nsuet_pudding\nBavarian_cream\nmaraschino, maraschino_cherry\nnonpareil\nzabaglione, sabayon\ngarnish\npastry, pastry_dough\nturnover\napple_turnover\nknish\npirogi, piroshki, pirozhki\nsamosa\ntimbale\npuff_paste, pate_feuillete\nphyllo\npuff_batter, pouf_paste, pate_a_choux\nice-cream_cake, icebox_cake\ndoughnut, donut, sinker\nfish_cake, fish_ball\nfish_stick, fish_finger\nconserve, preserve, conserves, preserves\napple_butter\nchowchow\njam\nlemon_curd, lemon_cheese\nstrawberry_jam, strawberry_preserves\njelly\napple_jelly\ncrabapple_jelly\ngrape_jelly\nmarmalade\norange_marmalade\ngelatin, jelly\ngelatin_dessert\nbuffalo_wing\nbarbecued_wing\nmess\nmince\npuree\nbarbecue, barbeque\nbiryani, biriani\nescalope_de_veau_Orloff\nsaute\npatty, cake\nveal_parmesan, veal_parmigiana\nveal_cordon_bleu\nmargarine, margarin, oleo, oleomargarine, marge\nmincemeat\nstuffing, dressing\nturkey_stuffing\noyster_stuffing, oyster_dressing\nforcemeat, farce\nbread, breadstuff, staff_of_life\nanadama_bread\nbap\nbarmbrack\nbreadstick, bread-stick\ngrissino\nbrown_bread, Boston_brown_bread\nbun, roll\ntea_bread\ncaraway_seed_bread\nchallah, hallah\ncinnamon_bread\ncracked-wheat_bread\ncracker\ncrouton\ndark_bread, whole_wheat_bread, whole_meal_bread, brown_bread\nEnglish_muffin\nflatbread\ngarlic_bread\ngluten_bread\ngraham_bread\nHost\nflatbrod\nbannock\nchapatti, chapati\npita, pocket_bread\nloaf_of_bread, loaf\nFrench_loaf\nmatzo, matzoh, matzah, unleavened_bread\nnan, naan\nonion_bread\nraisin_bread\nquick_bread\nbanana_bread\ndate_bread\ndate-nut_bread\nnut_bread\noatcake\nIrish_soda_bread\nskillet_bread, fry_bread\nrye_bread\nblack_bread, pumpernickel\nJewish_rye_bread, Jewish_rye\nlimpa\nSwedish_rye_bread, Swedish_rye\nsalt-rising_bread\nsimnel\nsour_bread, sourdough_bread\ntoast\nwafer\nwhite_bread, light_bread\nbaguet, baguette\nFrench_bread\nItalian_bread\ncornbread\ncorn_cake\nskillet_corn_bread\nashcake, ash_cake, corn_tash\nhoecake\ncornpone, pone\ncorn_dab, corn_dodger, dodger\nhush_puppy, hushpuppy\njohnnycake, johnny_cake, journey_cake\nShawnee_cake\nspoon_bread, batter_bread\ncinnamon_toast\norange_toast\nMelba_toast\nzwieback, rusk, Brussels_biscuit, twice-baked_bread\nfrankfurter_bun, hotdog_bun\nhamburger_bun, hamburger_roll\nmuffin, gem\nbran_muffin\ncorn_muffin\nYorkshire_pudding\npopover\nscone\ndrop_scone, griddlecake, Scotch_pancake\ncross_bun, hot_cross_bun\nbrioche\ncrescent_roll, croissant\nhard_roll, Vienna_roll\nsoft_roll\nkaiser_roll\nParker_House_roll\nclover-leaf_roll\nonion_roll\nbialy, bialystoker\nsweet_roll, coffee_roll\nbear_claw, bear_paw\ncinnamon_roll, cinnamon_bun, cinnamon_snail\nhoney_bun, sticky_bun, caramel_bun, schnecken\npinwheel_roll\ndanish, danish_pastry\nbagel, beigel\nonion_bagel\nbiscuit\nrolled_biscuit\nbaking-powder_biscuit\nbuttermilk_biscuit, soda_biscuit\nshortcake\nhardtack, pilot_biscuit, pilot_bread, sea_biscuit, ship_biscuit\nsaltine\nsoda_cracker\noyster_cracker\nwater_biscuit\ngraham_cracker\npretzel\nsoft_pretzel\nsandwich\nsandwich_plate\nbutty\nham_sandwich\nchicken_sandwich\nclub_sandwich, three-decker, triple-decker\nopen-face_sandwich, open_sandwich\nhamburger, beefburger, burger\ncheeseburger\ntunaburger\nhotdog, hot_dog, red_hot\nSloppy_Joe\nbomber, grinder, hero, hero_sandwich, hoagie, hoagy, Cuban_sandwich, Italian_sandwich, poor_boy, sub, submarine, submarine_sandwich, torpedo, wedge, zep\ngyro\nbacon-lettuce-tomato_sandwich, BLT\nReuben\nwestern, western_sandwich\nwrap\nspaghetti\nhasty_pudding\ngruel\ncongee, jook\nskilly\nedible_fruit\nvegetable, veggie, veg\njulienne, julienne_vegetable\nraw_vegetable, rabbit_food\ncrudites\ncelery_stick\nlegume\npulse\npotherb\ngreens, green, leafy_vegetable\nchop-suey_greens\nbean_curd, tofu\nsolanaceous_vegetable\nroot_vegetable\npotato, white_potato, Irish_potato, murphy, spud, tater\nbaked_potato\nfrench_fries, french-fried_potatoes, fries, chips\nhome_fries, home-fried_potatoes\njacket_potato\nmashed_potato\npotato_skin, potato_peel, potato_peelings\nUruguay_potato\nyam\nsweet_potato\nyam\nsnack_food\nchip, crisp, potato_chip, Saratoga_chip\ncorn_chip\ntortilla_chip\nnacho\neggplant, aubergine, mad_apple\npieplant, rhubarb\ncruciferous_vegetable\nmustard, mustard_greens, leaf_mustard, Indian_mustard\ncabbage, chou\nkale, kail, cole\ncollards, collard_greens\nChinese_cabbage, celery_cabbage, Chinese_celery\nbok_choy, bok_choi\nhead_cabbage\nred_cabbage\nsavoy_cabbage, savoy\nbroccoli\ncauliflower\nbrussels_sprouts\nbroccoli_rabe, broccoli_raab\nsquash\nsummer_squash\nyellow_squash\ncrookneck, crookneck_squash, summer_crookneck\nzucchini, courgette\nmarrow, vegetable_marrow\ncocozelle\npattypan_squash\nspaghetti_squash\nwinter_squash\nacorn_squash\nbutternut_squash\nhubbard_squash\nturban_squash\nbuttercup_squash\ncushaw\nwinter_crookneck_squash\ncucumber, cuke\ngherkin\nartichoke, globe_artichoke\nartichoke_heart\nJerusalem_artichoke, sunchoke\nasparagus\nbamboo_shoot\nsprout\nbean_sprout\nalfalfa_sprout\nbeet, beetroot\nbeet_green\nsugar_beet\nmangel-wurzel\nchard, Swiss_chard, spinach_beet, leaf_beet\npepper\nsweet_pepper\nbell_pepper\ngreen_pepper\nglobe_pepper\npimento, pimiento\nhot_pepper\nchili, chili_pepper, chilli, chilly, chile\njalapeno, jalapeno_pepper\nchipotle\ncayenne, cayenne_pepper\ntabasco, red_pepper\nonion\nBermuda_onion\ngreen_onion, spring_onion, scallion\nVidalia_onion\nSpanish_onion\npurple_onion, red_onion\nleek\nshallot\nsalad_green, salad_greens\nlettuce\nbutterhead_lettuce\nbuttercrunch\nBibb_lettuce\nBoston_lettuce\ncrisphead_lettuce, iceberg_lettuce, iceberg\ncos, cos_lettuce, romaine, romaine_lettuce\nleaf_lettuce, loose-leaf_lettuce\nceltuce\nbean, edible_bean\ngoa_bean\nlentil\npea\ngreen_pea, garden_pea\nmarrowfat_pea\nsnow_pea, sugar_pea\nsugar_snap_pea\nsplit-pea\nchickpea, garbanzo\ncajan_pea, pigeon_pea, dahl\nfield_pea\nmushy_peas\nblack-eyed_pea, cowpea\ncommon_bean\nkidney_bean\nnavy_bean, pea_bean, white_bean\npinto_bean\nfrijole\nblack_bean, turtle_bean\nfresh_bean\nflageolet, haricot\ngreen_bean\nsnap_bean, snap\nstring_bean\nKentucky_wonder, Kentucky_wonder_bean\nscarlet_runner, scarlet_runner_bean, runner_bean, English_runner_bean\nharicot_vert, haricots_verts, French_bean\nwax_bean, yellow_bean\nshell_bean\nlima_bean\nFordhooks\nsieva_bean, butter_bean, butterbean, civet_bean\nfava_bean, broad_bean\nsoy, soybean, soya, soya_bean\ngreen_soybean\nfield_soybean\ncardoon\ncarrot\ncarrot_stick\ncelery\npascal_celery, Paschal_celery\nceleriac, celery_root\nchicory, curly_endive\nradicchio\ncoffee_substitute\nchicory, chicory_root\nPostum\nchicory_escarole, endive, escarole\nBelgian_endive, French_endive, witloof\ncorn, edible_corn\nsweet_corn, green_corn\nhominy\nlye_hominy\npearl_hominy\npopcorn\ncress\nwatercress\ngarden_cress\nwinter_cress\ndandelion_green\ngumbo, okra\nkohlrabi, turnip_cabbage\nlamb's-quarter, pigweed, wild_spinach\nwild_spinach\ntomato\nbeefsteak_tomato\ncherry_tomato\nplum_tomato\ntomatillo, husk_tomato, Mexican_husk_tomato\nmushroom\nstuffed_mushroom\nsalsify\noyster_plant, vegetable_oyster\nscorzonera, black_salsify\nparsnip\npumpkin\nradish\nturnip\nwhite_turnip\nrutabaga, swede, swedish_turnip, yellow_turnip\nturnip_greens\nsorrel, common_sorrel\nFrench_sorrel\nspinach\ntaro, taro_root, cocoyam, dasheen, edda\ntruffle, earthnut\nedible_nut\nbunya_bunya\npeanut, earthnut, goober, goober_pea, groundnut, monkey_nut\nfreestone\ncling, clingstone\nwindfall\napple\ncrab_apple, crabapple\neating_apple, dessert_apple\nBaldwin\nCortland\nCox's_Orange_Pippin\nDelicious\nGolden_Delicious, Yellow_Delicious\nRed_Delicious\nEmpire\nGrimes'_golden\nJonathan\nMcIntosh\nMacoun\nNorthern_Spy\nPearmain\nPippin\nPrima\nStayman\nWinesap\nStayman_Winesap\ncooking_apple\nBramley's_Seedling\nGranny_Smith\nLane's_Prince_Albert\nNewtown_Wonder\nRome_Beauty\nberry\nbilberry, whortleberry, European_blueberry\nhuckleberry\nblueberry\nwintergreen, boxberry, checkerberry, teaberry, spiceberry\ncranberry\nlingonberry, mountain_cranberry, cowberry, lowbush_cranberry\ncurrant\ngooseberry\nblack_currant\nred_currant\nblackberry\nboysenberry\ndewberry\nloganberry\nraspberry\nsaskatoon, serviceberry, shadberry, juneberry\nstrawberry\nsugarberry, hackberry\npersimmon\nacerola, barbados_cherry, surinam_cherry, West_Indian_cherry\ncarambola, star_fruit\nceriman, monstera\ncarissa_plum, natal_plum\ncitrus, citrus_fruit, citrous_fruit\norange\ntemple_orange\nmandarin, mandarin_orange\nclementine\nsatsuma\ntangerine\ntangelo, ugli, ugli_fruit\nbitter_orange, Seville_orange, sour_orange\nsweet_orange\nJaffa_orange\nnavel_orange\nValencia_orange\nkumquat\nlemon\nlime\nkey_lime\ngrapefruit\npomelo, shaddock\ncitrange\ncitron\nalmond\nJordan_almond\napricot\npeach\nnectarine\npitahaya\nplum\ndamson, damson_plum\ngreengage, greengage_plum\nbeach_plum\nsloe\nVictoria_plum\ndried_fruit\ndried_apricot\nprune\nraisin\nseedless_raisin, sultana\nseeded_raisin\ncurrant\nfig\npineapple, ananas\nanchovy_pear, river_pear\nbanana\npassion_fruit\ngranadilla\nsweet_calabash\nbell_apple, sweet_cup, water_lemon, yellow_granadilla\nbreadfruit\njackfruit, jak, jack\ncacao_bean, cocoa_bean\ncocoa\ncanistel, eggfruit\nmelon\nmelon_ball\nmuskmelon, sweet_melon\ncantaloup, cantaloupe\nwinter_melon\nhoneydew, honeydew_melon\nPersian_melon\nnet_melon, netted_melon, nutmeg_melon\ncasaba, casaba_melon\nwatermelon\ncherry\nsweet_cherry, black_cherry\nbing_cherry\nheart_cherry, oxheart, oxheart_cherry\nblackheart, blackheart_cherry\ncapulin, Mexican_black_cherry\nsour_cherry\namarelle\nmorello\ncocoa_plum, coco_plum, icaco\ngherkin\ngrape\nfox_grape\nConcord_grape\nCatawba\nmuscadine, bullace_grape\nscuppernong\nslipskin_grape\nvinifera_grape\nemperor\nmuscat, muscatel, muscat_grape\nribier\nsultana\nTokay\nflame_tokay\nThompson_Seedless\ncustard_apple\ncherimoya, cherimolla\nsoursop, guanabana\nsweetsop, annon, sugar_apple\nilama\npond_apple\npapaw, pawpaw\npapaya\nkai_apple\nketembilla, kitembilla, kitambilla\nackee, akee\ndurian\nfeijoa, pineapple_guava\ngenip, Spanish_lime\ngenipap, genipap_fruit\nkiwi, kiwi_fruit, Chinese_gooseberry\nloquat, Japanese_plum\nmangosteen\nmango\nsapodilla, sapodilla_plum, sapota\nsapote, mammee, marmalade_plum\ntamarind, tamarindo\navocado, alligator_pear, avocado_pear, aguacate\ndate\nelderberry\nguava\nmombin\nhog_plum, yellow_mombin\nhog_plum, wild_plum\njaboticaba\njujube, Chinese_date, Chinese_jujube\nlitchi, litchi_nut, litchee, lichi, leechee, lichee, lychee\nlonganberry, dragon's_eye\nmamey, mammee, mammee_apple\nmarang\nmedlar\nmedlar\nmulberry\nolive\nblack_olive, ripe_olive\ngreen_olive\npear\nbosc\nanjou\nbartlett, bartlett_pear\nseckel, seckel_pear\nplantain\nplumcot\npomegranate\nprickly_pear\nBarbados_gooseberry, blade_apple\nquandong, quandang, quantong, native_peach\nquandong_nut\nquince\nrambutan, rambotan\npulasan, pulassan\nrose_apple\nsorb, sorb_apple\nsour_gourd, monkey_bread\nedible_seed\npumpkin_seed\nbetel_nut, areca_nut\nbeechnut\nwalnut\nblack_walnut\nEnglish_walnut\nbrazil_nut, brazil\nbutternut\nsouari_nut\ncashew, cashew_nut\nchestnut\nchincapin, chinkapin, chinquapin\nhazelnut, filbert, cobnut, cob\ncoconut, cocoanut\ncoconut_milk, coconut_water\ngrugru_nut\nhickory_nut\ncola_extract\nmacadamia_nut\npecan\npine_nut, pignolia, pinon_nut\npistachio, pistachio_nut\nsunflower_seed\nanchovy_paste\nrollmops\nfeed, provender\ncattle_cake\ncreep_feed\nfodder\nfeed_grain\neatage, forage, pasture, pasturage, grass\nsilage, ensilage\noil_cake\noil_meal\nalfalfa\nbroad_bean, horse_bean\nhay\ntimothy\nstover\ngrain, food_grain, cereal\ngrist\ngroats\nmillet\nbarley, barleycorn\npearl_barley\nbuckwheat\nbulgur, bulghur, bulgur_wheat\nwheat, wheat_berry\ncracked_wheat\nstodge\nwheat_germ\noat\nrice\nbrown_rice\nwhite_rice, polished_rice\nwild_rice, Indian_rice\npaddy\nslop, slops, swill, pigswill, pigwash\nmash\nchicken_feed, scratch\ncud, rechewed_food\nbird_feed, bird_food, birdseed\npetfood, pet-food, pet_food\ndog_food\ncat_food\ncanary_seed\nsalad\ntossed_salad\ngreen_salad\nCaesar_salad\nsalmagundi\nsalad_nicoise\ncombination_salad\nchef's_salad\npotato_salad\npasta_salad\nmacaroni_salad\nfruit_salad\nWaldorf_salad\ncrab_Louis\nherring_salad\ntuna_fish_salad, tuna_salad\nchicken_salad\ncoleslaw, slaw\naspic\nmolded_salad\ntabbouleh, tabooli\ningredient, fixings\nflavorer, flavourer, flavoring, flavouring, seasoner, seasoning\nbouillon_cube\ncondiment\nherb\nfines_herbes\nspice\nspearmint_oil\nlemon_oil\nwintergreen_oil, oil_of_wintergreen\nsalt, table_salt, common_salt\ncelery_salt\nonion_salt\nseasoned_salt\nsour_salt\nfive_spice_powder\nallspice\ncinnamon\nstick_cinnamon\nclove\ncumin, cumin_seed\nfennel\nginger, gingerroot\nginger, powdered_ginger\nmace\nnutmeg\npepper, peppercorn\nblack_pepper\nwhite_pepper\nsassafras\nbasil, sweet_basil\nbay_leaf\nborage\nhyssop\ncaraway\nchervil\nchives\ncomfrey, healing_herb\ncoriander, Chinese_parsley, cilantro\ncoriander, coriander_seed\ncostmary\nfennel, common_fennel\nfennel, Florence_fennel, finocchio\nfennel_seed\nfenugreek, fenugreek_seed\ngarlic, ail\nclove, garlic_clove\ngarlic_chive\nlemon_balm\nlovage\nmarjoram, oregano\nmint\nmustard_seed\nmustard, table_mustard\nChinese_mustard\nnasturtium\nparsley\nsalad_burnet\nrosemary\nrue\nsage\nclary_sage\nsavory, savoury\nsummer_savory, summer_savoury\nwinter_savory, winter_savoury\nsweet_woodruff, waldmeister\nsweet_cicely\ntarragon, estragon\nthyme\nturmeric\ncaper\ncatsup, ketchup, cetchup, tomato_ketchup\ncardamom, cardamon, cardamum\ncayenne, cayenne_pepper, red_pepper\nchili_powder\nchili_sauce\nchutney, Indian_relish\nsteak_sauce\ntaco_sauce\nsalsa\nmint_sauce\ncranberry_sauce\ncurry_powder\ncurry\nlamb_curry\nduck_sauce, hoisin_sauce\nhorseradish\nmarinade\npaprika\nSpanish_paprika\npickle\ndill_pickle\nbread_and_butter_pickle\npickle_relish\npiccalilli\nsweet_pickle\napplesauce, apple_sauce\nsoy_sauce, soy\nTabasco, Tabasco_sauce\ntomato_paste\nangelica\nangelica\nalmond_extract\nanise, aniseed, anise_seed\nChinese_anise, star_anise, star_aniseed\njuniper_berries\nsaffron\nsesame_seed, benniseed\ncaraway_seed\npoppy_seed\ndill, dill_weed\ndill_seed\ncelery_seed\nlemon_extract\nmonosodium_glutamate, MSG\nvanilla_bean\nvinegar, acetum\ncider_vinegar\nwine_vinegar\nsauce\nanchovy_sauce\nhot_sauce\nhard_sauce\nhorseradish_sauce, sauce_Albert\nbolognese_pasta_sauce\ncarbonara\ntomato_sauce\ntartare_sauce, tartar_sauce\nwine_sauce\nmarchand_de_vin, mushroom_wine_sauce\nbread_sauce\nplum_sauce\npeach_sauce\napricot_sauce\npesto\nravigote, ravigotte\nremoulade_sauce\ndressing, salad_dressing\nsauce_Louis\nbleu_cheese_dressing, blue_cheese_dressing\nblue_cheese_dressing, Roquefort_dressing\nFrench_dressing, vinaigrette, sauce_vinaigrette\nLorenzo_dressing\nanchovy_dressing\nItalian_dressing\nhalf-and-half_dressing\nmayonnaise, mayo\ngreen_mayonnaise, sauce_verte\naioli, aioli_sauce, garlic_sauce\nRussian_dressing, Russian_mayonnaise\nsalad_cream\nThousand_Island_dressing\nbarbecue_sauce\nhollandaise\nbearnaise\nBercy, Bercy_butter\nbordelaise\nbourguignon, bourguignon_sauce, Burgundy_sauce\nbrown_sauce, sauce_Espagnole\nEspagnole, sauce_Espagnole\nChinese_brown_sauce, brown_sauce\nblanc\ncheese_sauce\nchocolate_sauce, chocolate_syrup\nhot-fudge_sauce, fudge_sauce\ncocktail_sauce, seafood_sauce\nColbert, Colbert_butter\nwhite_sauce, bechamel_sauce, bechamel\ncream_sauce\nMornay_sauce\ndemiglace, demi-glaze\ngravy, pan_gravy\ngravy\nspaghetti_sauce, pasta_sauce\nmarinara\nmole\nhunter's_sauce, sauce_chausseur\nmushroom_sauce\nmustard_sauce\nNantua, shrimp_sauce\nHungarian_sauce, paprika_sauce\npepper_sauce, Poivrade\nroux\nSmitane\nSoubise, white_onion_sauce\nLyonnaise_sauce, brown_onion_sauce\nveloute\nallemande, allemande_sauce\ncaper_sauce\npoulette\ncurry_sauce\nWorcester_sauce, Worcestershire, Worcestershire_sauce\ncoconut_milk, coconut_cream\negg, eggs\negg_white, white, albumen, ovalbumin\negg_yolk, yolk\nboiled_egg, coddled_egg\nhard-boiled_egg, hard-cooked_egg\nEaster_egg\nEaster_egg\nchocolate_egg\ncandy_egg\npoached_egg, dropped_egg\nscrambled_eggs\ndeviled_egg, stuffed_egg\nshirred_egg, baked_egg, egg_en_cocotte\nomelet, omelette\nfirm_omelet\nFrench_omelet\nfluffy_omelet\nwestern_omelet\nsouffle\nfried_egg\ndairy_product\nmilk\nmilk\nsour_milk\nsoya_milk, soybean_milk, soymilk\nformula\npasteurized_milk\ncows'_milk\nyak's_milk\ngoats'_milk\nacidophilus_milk\nraw_milk\nscalded_milk\nhomogenized_milk\ncertified_milk\npowdered_milk, dry_milk, dried_milk, milk_powder\nnonfat_dry_milk\nevaporated_milk\ncondensed_milk\nskim_milk, skimmed_milk\nsemi-skimmed_milk\nwhole_milk\nlow-fat_milk\nbuttermilk\ncream\nclotted_cream, Devonshire_cream\ndouble_creme, heavy_whipping_cream\nhalf-and-half\nheavy_cream\nlight_cream, coffee_cream, single_cream\nsour_cream, soured_cream\nwhipping_cream, light_whipping_cream\nbutter\nclarified_butter, drawn_butter\nghee\nbrown_butter, beurre_noisette\nMeuniere_butter, lemon_butter\nyogurt, yoghurt, yoghourt\nblueberry_yogurt\nraita\nwhey\ncurd\ncurd\nclabber\ncheese\nparing\ncream_cheese\ndouble_cream\nmascarpone\ntriple_cream, triple_creme\ncottage_cheese, pot_cheese, farm_cheese, farmer's_cheese\nprocess_cheese, processed_cheese\nbleu, blue_cheese\nStilton\nRoquefort\ngorgonzola\nDanish_blue\nBavarian_blue\nBrie\nbrick_cheese\nCamembert\ncheddar, cheddar_cheese, Armerican_cheddar, American_cheese\nrat_cheese, store_cheese\nCheshire_cheese\ndouble_Gloucester\nEdam\ngoat_cheese, chevre\nGouda, Gouda_cheese\ngrated_cheese\nhand_cheese\nLiederkranz\nLimburger\nmozzarella\nMuenster\nParmesan\nquark_cheese, quark\nricotta\nstring_cheese\nSwiss_cheese\nEmmenthal, Emmental, Emmenthaler, Emmentaler\nGruyere\nsapsago\nVelveeta\nnut_butter\npeanut_butter\nmarshmallow_fluff\nonion_butter\npimento_butter\nshrimp_butter\nlobster_butter\nyak_butter\nspread, paste\ncheese_spread\nanchovy_butter\nfishpaste\ngarlic_butter\nmiso\nwasabi\nsnail_butter\nhummus, humus, hommos, hoummos, humous\npate\nduck_pate\nfoie_gras, pate_de_foie_gras\ntapenade\ntahini\nsweetening, sweetener\naspartame\nhoney\nsaccharin\nsugar, refined_sugar\nsyrup, sirup\nsugar_syrup\nmolasses\nsorghum, sorghum_molasses\ntreacle, golden_syrup\ngrenadine\nmaple_syrup\ncorn_syrup\nmiraculous_food, manna, manna_from_heaven\nbatter\ndough\nbread_dough\npancake_batter\nfritter_batter\ncoq_au_vin\nchicken_provencale\nchicken_and_rice\nmoo_goo_gai_pan\narroz_con_pollo\nbacon_and_eggs\nbarbecued_spareribs, spareribs\nbeef_Bourguignonne, boeuf_Bourguignonne\nbeef_Wellington, filet_de_boeuf_en_croute\nbitok\nboiled_dinner, New_England_boiled_dinner\nBoston_baked_beans\nbubble_and_squeak\npasta\ncannelloni\ncarbonnade_flamande, Belgian_beef_stew\ncheese_souffle\nchicken_Marengo\nchicken_cordon_bleu\nMaryland_chicken\nchicken_paprika, chicken_paprikash\nchicken_Tetrazzini\nTetrazzini\nchicken_Kiev\nchili, chili_con_carne\nchili_dog\nchop_suey\nchow_mein\ncodfish_ball, codfish_cake\ncoquille\ncoquilles_Saint-Jacques\ncroquette\ncottage_pie\nrissole\ndolmas, stuffed_grape_leaves\negg_foo_yong, egg_fu_yung\negg_roll, spring_roll\neggs_Benedict\nenchilada\nfalafel, felafel\nfish_and_chips\nfondue, fondu\ncheese_fondue\nchocolate_fondue\nfondue, fondu\nbeef_fondue, boeuf_fondu_bourguignon\nFrench_toast\nfried_rice, Chinese_fried_rice\nfrittata\nfrog_legs\ngalantine\ngefilte_fish, fish_ball\nhaggis\nham_and_eggs\nhash\ncorned_beef_hash\njambalaya\nkabob, kebab, shish_kebab\nkedgeree\nsouvlaki, souvlakia\nlasagna, lasagne\nseafood_Newburg\nlobster_Newburg, lobster_a_la_Newburg\nshrimp_Newburg\nNewburg_sauce\nlobster_thermidor\nlutefisk, lutfisk\nmacaroni_and_cheese\nmacedoine\nmeatball\nporcupine_ball, porcupines\nSwedish_meatball\nmeat_loaf, meatloaf\nmoussaka\nosso_buco\nmarrow, bone_marrow\npheasant_under_glass\npigs_in_blankets\npilaf, pilaff, pilau, pilaw\nbulgur_pilaf\npizza, pizza_pie\nsausage_pizza\npepperoni_pizza\ncheese_pizza\nanchovy_pizza\nSicilian_pizza\npoi\npork_and_beans\nporridge\noatmeal, burgoo\nloblolly\npotpie\nrijsttaffel, rijstaffel, rijstafel\nrisotto, Italian_rice\nroulade\nfish_loaf\nsalmon_loaf\nSalisbury_steak\nsauerbraten\nsauerkraut\nscallopine, scallopini\nveal_scallopini\nscampi\nScotch_egg\nScotch_woodcock\nscrapple\nspaghetti_and_meatballs\nSpanish_rice\nsteak_tartare, tartar_steak, cannibal_mound\npepper_steak\nsteak_au_poivre, peppered_steak, pepper_steak\nbeef_Stroganoff\nstuffed_cabbage\nkishke, stuffed_derma\nstuffed_peppers\nstuffed_tomato, hot_stuffed_tomato\nstuffed_tomato, cold_stuffed_tomato\nsuccotash\nsukiyaki\nsashimi\nsushi\nSwiss_steak\ntamale\ntamale_pie\ntempura\nteriyaki\nterrine\nWelsh_rarebit, Welsh_rabbit, rarebit\nschnitzel, Wiener_schnitzel\ntaco\nchicken_taco\nburrito\nbeef_burrito\nquesadilla\ntostada\nbean_tostada\nrefried_beans, frijoles_refritos\nbeverage, drink, drinkable, potable\nwish-wash\nconcoction, mixture, intermixture\nmix, premix\nfilling\nlekvar\npotion\nelixir\nelixir_of_life\nphilter, philtre, love-potion, love-philter, love-philtre\nalcohol, alcoholic_drink, alcoholic_beverage, intoxicant, inebriant\nproof_spirit\nhome_brew, homebrew\nhooch, hootch\nkava, kavakava\naperitif\nbrew, brewage\nbeer\ndraft_beer, draught_beer\nsuds\nMunich_beer, Munchener\nbock, bock_beer\nlager, lager_beer\nlight_beer\nOktoberfest, Octoberfest\nPilsner, Pilsener\nshebeen\nWeissbier, white_beer, wheat_beer\nWeizenbock\nmalt\nwort\nmalt, malt_liquor\nale\nbitter\nBurton\npale_ale\nporter, porter's_beer\nstout\nGuinness\nkvass\nmead\nmetheglin\nhydromel\noenomel\nnear_beer\nginger_beer\nsake, saki, rice_beer\nwine, vino\nvintage\nred_wine\nwhite_wine\nblush_wine, pink_wine, rose, rose_wine\naltar_wine, sacramental_wine\nsparkling_wine\nchampagne, bubbly\ncold_duck\nBurgundy, Burgundy_wine\nBeaujolais\nMedoc\nCanary_wine\nChablis, white_Burgundy\nMontrachet\nChardonnay, Pinot_Chardonnay\nPinot_noir\nPinot_blanc\nBordeaux, Bordeaux_wine\nclaret, red_Bordeaux\nChianti\nCabernet, Cabernet_Sauvignon\nMerlot\nSauvignon_blanc\nCalifornia_wine\nCotes_de_Provence\ndessert_wine\nDubonnet\njug_wine\nmacon, maconnais\nMoselle\nMuscadet\nplonk\nretsina\nRhine_wine, Rhenish, hock\nRiesling\nliebfraumilch\nRhone_wine\nRioja\nsack\nSaint_Emilion\nSoave\nzinfandel\nSauterne, Sauternes\nstraw_wine\ntable_wine\nTokay\nvin_ordinaire\nvermouth\nsweet_vermouth, Italian_vermouth\ndry_vermouth, French_vermouth\nChenin_blanc\nVerdicchio\nVouvray\nYquem\ngeneric, generic_wine\nvarietal, varietal_wine\nfortified_wine\nMadeira\nmalmsey\nport, port_wine\nsherry\nMarsala\nmuscat, muscatel, muscadel, muscadelle\nliquor, spirits, booze, hard_drink, hard_liquor, John_Barleycorn, strong_drink\nneutral_spirits, ethyl_alcohol\naqua_vitae, ardent_spirits\neau_de_vie\nmoonshine, bootleg, corn_liquor\nbathtub_gin\naquavit, akvavit\narrack, arak\nbitters\nbrandy\napplejack\nCalvados\nArmagnac\nCognac\ngrappa\nkirsch\nslivovitz\ngin\nsloe_gin\ngeneva, Holland_gin, Hollands\ngrog\nouzo\nrum\ndemerara, demerara_rum\nJamaica_rum\nschnapps, schnaps\npulque\nmescal\ntequila\nvodka\nwhiskey, whisky\nblended_whiskey, blended_whisky\nbourbon\ncorn_whiskey, corn_whisky, corn\nfirewater\nIrish, Irish_whiskey, Irish_whisky\npoteen\nrye, rye_whiskey, rye_whisky\nScotch, Scotch_whiskey, Scotch_whisky, malt_whiskey, malt_whisky, Scotch_malt_whiskey, Scotch_malt_whisky\nsour_mash, sour_mash_whiskey\nliqueur, cordial\nabsinth, absinthe\namaretto\nanisette, anisette_de_Bordeaux\nbenedictine\nChartreuse\ncoffee_liqueur\ncreme_de_cacao\ncreme_de_menthe\ncreme_de_fraise\nDrambuie\nGalliano\norange_liqueur\ncuracao, curacoa\ntriple_sec\nGrand_Marnier\nkummel\nmaraschino, maraschino_liqueur\npastis\nPernod\npousse-cafe\nKahlua\nratafia, ratafee\nsambuca\nmixed_drink\ncocktail\nDom_Pedro\nhighball\nmixer\nbishop\nBloody_Mary\nVirgin_Mary, bloody_shame\nbullshot\ncobbler\ncollins, Tom_Collins\ncooler\nrefresher\nsmoothie\ndaiquiri, rum_cocktail\nstrawberry_daiquiri\nNADA_daiquiri\nspritzer\nflip\ngimlet\ngin_and_tonic\ngrasshopper\nHarvey_Wallbanger\njulep, mint_julep\nmanhattan\nRob_Roy\nmargarita\nmartini\ngin_and_it\nvodka_martini\nold_fashioned\npink_lady\nSazerac\nscrewdriver\nsidecar\nScotch_and_soda\nsling\nbrandy_sling\ngin_sling\nrum_sling\nsour\nwhiskey_sour, whisky_sour\nstinger\nswizzle\nhot_toddy, toddy\nzombie, zombi\nfizz\nIrish_coffee\ncafe_au_lait\ncafe_noir, demitasse\ndecaffeinated_coffee, decaf\ndrip_coffee\nespresso\ncaffe_latte, latte\ncappuccino, cappuccino_coffee, coffee_cappuccino\niced_coffee, ice_coffee\ninstant_coffee\nmocha, mocha_coffee\nmocha\ncassareep\nTurkish_coffee\nchocolate_milk\ncider, cyder\nhard_cider\nscrumpy\nsweet_cider\nmulled_cider\nperry\nrotgut\nslug\ncocoa, chocolate, hot_chocolate, drinking_chocolate\ncriollo\njuice\nfruit_juice, fruit_crush\nnectar\napple_juice\ncranberry_juice\ngrape_juice\nmust\ngrapefruit_juice\norange_juice\nfrozen_orange_juice, orange-juice_concentrate\npineapple_juice\nlemon_juice\nlime_juice\npapaya_juice\ntomato_juice\ncarrot_juice\nV-8_juice\nkoumiss, kumis\nfruit_drink, ade\nlemonade\nlimeade\norangeade\nmalted_milk\nmate\nmulled_wine\nnegus\nsoft_drink\npop, soda, soda_pop, soda_water, tonic\nbirch_beer\nbitter_lemon\ncola, dope\ncream_soda\negg_cream\nginger_ale, ginger_pop\norange_soda\nphosphate\nCoca_Cola, Coke\nPepsi, Pepsi_Cola\nroot_beer\nsarsaparilla\ntonic, tonic_water, quinine_water\ncoffee_bean, coffee_berry, coffee\ncoffee, java\ncafe_royale, coffee_royal\nfruit_punch\nmilk_punch\nmimosa, buck's_fizz\npina_colada\npunch\ncup\nchampagne_cup\nclaret_cup\nwassail\nplanter's_punch\nWhite_Russian\nfish_house_punch\nMay_wine\neggnog\ncassiri\nspruce_beer\nrickey\ngin_rickey\ntea, tea_leaf\ntea_bag\ntea\ntea-like_drink\ncambric_tea\ncuppa, cupper\nherb_tea, herbal_tea, herbal\ntisane\ncamomile_tea\nice_tea, iced_tea\nsun_tea\nblack_tea\ncongou, congo, congou_tea, English_breakfast_tea\nDarjeeling\norange_pekoe, pekoe\nsouchong, soochong\ngreen_tea\nhyson\noolong\nwater\nbottled_water\nbranch_water\nspring_water\nsugar_water\ndrinking_water\nice_water\nsoda_water, carbonated_water, club_soda, seltzer, sparkling_water\nmineral_water\nseltzer\nVichy_water\nperishable, spoilable\ncouscous\nramekin, ramequin\nmultivitamin, multivitamin_pill\nvitamin_pill\nsoul_food\nmold, mould\npeople\ncollection, aggregation, accumulation, assemblage\nbook, rule_book\nlibrary\nbaseball_club, ball_club, club, nine\ncrowd\nclass, form, grade, course\ncore, nucleus, core_group\nconcert_band, military_band\ndance\nwedding, wedding_party\nchain, concatenation\npower_breakfast\naerie, aery, eyrie, eyry\nagora\namusement_park, funfair, pleasure_ground\naphelion\napron\ninterplanetary_space\ninterstellar_space\nintergalactic_space\nbush\nsemidesert\nbeam-ends\nbridgehead\nbus_stop\ncampsite, campground, camping_site, camping_ground, bivouac, encampment, camping_area\ndetention_basin\ncemetery, graveyard, burial_site, burial_ground, burying_ground, memorial_park, necropolis\ntrichion, crinion\ncity, metropolis, urban_center\nbusiness_district, downtown\noutskirts\nborough\ncow_pasture\ncrest\neparchy, exarchate\nsuburb, suburbia, suburban_area\nstockbroker_belt\ncrawlspace, crawl_space\nsheikdom, sheikhdom\nresidence, abode\ndomicile, legal_residence\ndude_ranch\nfarmland, farming_area\nmidfield\nfirebreak, fireguard\nflea_market\nbattlefront, front, front_line\ngarbage_heap, junk_heap, rubbish_heap, scrapheap, trash_heap, junk_pile, trash_pile, refuse_heap\nbenthos, benthic_division, benthonic_zone\ngoldfield\ngrainfield, grain_field\nhalf-mast, half-staff\nhemline\nheronry\nhipline\nhipline\nhole-in-the-wall\njunkyard\nisoclinic_line, isoclinal\nlittoral, litoral, littoral_zone, sands\nmagnetic_pole\ngrassland\nmecca\nobserver's_meridian\nprime_meridian\nnombril\nno-parking_zone\noutdoors, out-of-doors, open_air, open\nfairground\npasture, pastureland, grazing_land, lea, ley\nperihelion\nperiselene, perilune\nlocus_of_infection\nkasbah, casbah\nwaterfront\nresort, resort_hotel, holiday_resort\nresort_area, playground, vacation_spot\nrough\nashram\nharborage, harbourage\nscrubland\nweald\nwold\nschoolyard\nshowplace\nbedside\nsideline, out_of_bounds\nski_resort\nsoil_horizon\ngeological_horizon\ncoal_seam\ncoalface\nfield\noilfield\nTemperate_Zone\nterreplein\nthree-mile_limit\ndesktop\ntop\nkampong, campong\nsubtropics, semitropics\nbarrio\nveld, veldt\nvertex, peak, apex, acme\nwaterline, water_line, water_level\nhigh-water_mark\nlow-water_mark\ncontinental_divide\nzodiac\nAegean_island\nsultanate\nSwiss_canton\nabyssal_zone\naerie, aery, eyrie, eyry\nair_bubble\nalluvial_flat, alluvial_plain\nalp\nAlpine_glacier, Alpine_type_of_glacier\nanthill, formicary\naquifer\narchipelago\narete\narroyo\nascent, acclivity, rise, raise, climb, upgrade\nasterism\nasthenosphere\natoll\nbank\nbank\nbar\nbarbecue_pit\nbarrier_reef\nbaryon, heavy_particle\nbasin\nbeach\nhoneycomb\nbelay\nben\nberm\nbladder_stone, cystolith\nbluff\nborrow_pit\nbrae\nbubble\nburrow, tunnel\nbutte\ncaldera\ncanyon, canon\ncanyonside\ncave\ncavern\nchasm\ncirque, corrie, cwm\ncliff, drop, drop-off\ncloud\ncoast\ncoastland\ncol, gap\ncollector\ncomet\ncontinental_glacier\ncoral_reef\ncove\ncrag\ncrater\ncultivated_land, farmland, plowland, ploughland, tilled_land, tillage, tilth\ndale\ndefile, gorge\ndelta\ndescent, declivity, fall, decline, declination, declension, downslope\ndiapir\ndivot\ndivot\ndown\ndownhill\ndraw\ndrey\ndrumlin\ndune, sand_dune\nescarpment, scarp\nesker\nfireball\nflare_star\nfloor\nfomite, vehicle\nfoothill\nfootwall\nforeland\nforeshore\ngauge_boson\ngeological_formation, formation\ngeyser\nglacier\nglen\ngopher_hole\ngorge\ngrotto, grot\ngrowler\ngulch, flume\ngully\nhail\nhighland, upland\nhill\nhillside\nhole, hollow\nhollow, holler\nhot_spring, thermal_spring\niceberg, berg\nicecap, ice_cap\nice_field\nice_floe, floe\nice_mass\ninclined_fault\nion\nisthmus\nkidney_stone, urinary_calculus, nephrolith, renal_calculus\nknoll, mound, hillock, hummock, hammock\nkopje, koppie\nKuiper_belt, Edgeworth-Kuiper_belt\nlake_bed, lake_bottom\nlakefront\nlakeside, lakeshore\nlandfall\nlandfill\nlather\nleak\nledge, shelf\nlepton\nlithosphere, geosphere\nlowland\nlunar_crater\nmaar\nmassif\nmeander\nmesa, table\nmeteorite\nmicrofossil\nmidstream\nmolehill\nmonocline\nmountain, mount\nmountainside, versant\nmouth\nmull\nnatural_depression, depression\nnatural_elevation, elevation\nnullah\nocean\nocean_floor, sea_floor, ocean_bottom, seabed, sea_bottom, Davy_Jones's_locker, Davy_Jones\noceanfront\noutcrop, outcropping, rock_outcrop\noxbow\npallasite\nperforation\nphotosphere\npiedmont\nPiedmont_glacier, Piedmont_type_of_glacier\npinetum\nplage\nplain, field, champaign\npoint\npolar_glacier\npothole, chuckhole\nprecipice\npromontory, headland, head, foreland\nptyalith\npulsar\nquicksand\nrabbit_burrow, rabbit_hole\nradiator\nrainbow\nrange, mountain_range, range_of_mountains, chain, mountain_chain, chain_of_mountains\nrangeland\nravine\nreef\nridge\nridge, ridgeline\nrift_valley\nriparian_forest\nripple_mark\nriverbank, riverside\nriverbed, river_bottom\nrock, stone\nroof\nsaltpan\nsandbank\nsandbar, sand_bar\nsandpit\nsanitary_landfill\nsawpit\nscablands\nseashore, coast, seacoast, sea-coast\nseaside, seaboard\nseif_dune\nshell\nshiner\nshoal\nshore\nshoreline\nsinkhole, sink, swallow_hole\nski_slope\nsky\nslope, incline, side\nsnowcap\nsnowdrift\nsnowfield\nsoapsuds, suds, lather\nspit, tongue\nspoor\nspume\nstar\nsteep\nsteppe\nstrand\nstreambed, creek_bed\nsun, Sun\nsupernova\nswale\nswamp, swampland\nswell\ntableland, plateau\ntalus, scree\ntangle\ntar_pit\nterrace, bench\ntidal_basin\ntideland\ntor\ntor\nTrapezium\ntroposphere\ntundra\ntwinkler\nuphill\nurolith\nvalley, vale\nvehicle-borne_transmission\nvein, mineral_vein\nvolcanic_crater, crater\nvolcano\nwadi\nwall\nwarren, rabbit_warren\nwasp's_nest, wasps'_nest, hornet's_nest, hornets'_nest\nwatercourse\nwaterside\nwater_table, water_level, groundwater_level\nwhinstone, whin\nwormcast\nxenolith\nCirce\ngryphon, griffin, griffon\nspiritual_leader\nmessiah, christ\nRhea_Silvia, Rea_Silvia\nnumber_one\nadventurer, venturer\nanomaly, unusual_person\nappointee, appointment\nargonaut\nAshkenazi\nbenefactor, helper\ncolor-blind_person\ncommoner, common_man, common_person\nconservator\ncontrarian\ncontadino\ncontestant\ncosigner, cosignatory\ndiscussant\nenologist, oenologist, fermentologist\nentertainer\neulogist, panegyrist\nex-gambler\nexperimenter\nexperimenter\nexponent\nex-president\nface\nfemale, female_person\nfinisher\ninhabitant, habitant, dweller, denizen, indweller\nnative, indigen, indigene, aborigine, aboriginal\nnative\njuvenile, juvenile_person\nlover\nmale, male_person\nmediator, go-between, intermediator, intermediary, intercessor\nmediatrix\nnational, subject\npeer, equal, match, compeer\nprize_winner, lottery_winner\nrecipient, receiver\nreligionist\nsensualist\ntraveler, traveller\nunwelcome_person, persona_non_grata\nunskilled_person\nworker\nwrongdoer, offender\nBlack_African\nAfrikaner, Afrikander, Boer\nAryan\nBlack, Black_person, blackamoor, Negro, Negroid\nBlack_woman\nmulatto\nWhite, White_person, Caucasian\nCircassian\nSemite\nChaldean, Chaldaean, Chaldee\nElamite\nwhite_man\nWASP, white_Anglo-Saxon_Protestant\ngook, slant-eye\nMongol, Mongolian\nTatar, Tartar, Mongol_Tatar\nNahuatl\nAztec\nOlmec\nBiloxi\nBlackfoot\nBrule\nCaddo\nCheyenne\nChickasaw\nCocopa, Cocopah\nComanche\nCreek\nDelaware\nDiegueno\nEsselen\nEyeish\nHavasupai\nHunkpapa\nIowa, Ioway\nKalapooia, Kalapuya, Calapooya, Calapuya\nKamia\nKekchi\nKichai\nKickapoo\nKiliwa, Kiliwi\nMalecite\nMaricopa\nMohican, Mahican\nMuskhogean, Muskogean\nNavaho, Navajo\nNootka\nOglala, Ogalala\nOsage\nOneida\nPaiute, Piute\nPassamaquody\nPenobscot\nPenutian\nPotawatomi\nPowhatan\nkachina\nSalish\nShahaptian, Sahaptin, Sahaptino\nShasta\nShawnee\nSihasapa\nTeton, Lakota, Teton_Sioux, Teton_Dakota\nTaracahitian\nTarahumara\nTuscarora\nTutelo\nYana\nYavapai\nYokuts\nYuma\nGadaba\nKolam\nKui\nToda\nTulu\nGujarati, Gujerati\nKashmiri\nPunjabi, Panjabi\nSlav\nAnabaptist\nAdventist, Second_Adventist\ngentile, non-Jew, goy\ngentile\nCatholic\nOld_Catholic\nUniat, Uniate, Uniate_Christian\nCopt\nJewess\nJihadist\nBuddhist\nZen_Buddhist\nMahayanist\nswami\nHare_Krishna\nShintoist\nEurafrican\nEurasian\nGael\nFrank\nAfghan, Afghanistani\nAlbanian\nAlgerian\nAltaic\nAndorran\nAngolan\nAnguillan\nAustrian\nBahamian\nBahraini, Bahreini\nBasotho\nHerero\nLuba, Chiluba\nBarbadian\nBolivian\nBornean\nCarioca\nTupi\nBruneian\nBulgarian\nByelorussian, Belorussian, White_Russian\nCameroonian\nCanadian\nFrench_Canadian\nCentral_American\nChilean\nCongolese\nCypriot, Cypriote, Cyprian\nDane\nDjiboutian\nBritisher, Briton, Brit\nEnglish_person\nEnglishwoman\nAnglo-Saxon\nAngle\nWest_Saxon\nLombard, Langobard\nlimey, John_Bull\nCantabrigian\nCornishman\nCornishwoman\nLancastrian\nLancastrian\nGeordie\nOxonian\nEthiopian\nAmhara\nEritrean\nFinn\nKomi\nLivonian\nLithuanian\nSelkup, Ostyak-Samoyed\nParisian\nParisienne\nCreole\nCreole\nGabonese\nGreek, Hellene\nDorian\nAthenian\nLaconian\nGuyanese\nHaitian\nMalay, Malayan\nMoro\nNetherlander, Dutchman, Hollander\nIcelander\nIraqi, Iraki\nIrishman\nIrishwoman\nDubliner\nItalian\nRoman\nSabine\nJapanese, Nipponese\nJordanian\nKorean\nKenyan\nLao, Laotian\nLapp, Lapplander, Sami, Saami, Same, Saame\nLatin_American, Latino\nLebanese\nLevantine\nLiberian\nLuxemburger, Luxembourger\nMacedonian\nSabahan\nMexican\nChicano\nMexican-American, Mexicano\nNamibian\nNauruan\nGurkha\nNew_Zealander, Kiwi\nNicaraguan\nNigerian\nHausa, Haussa\nNorth_American\nNova_Scotian, bluenose\nOmani\nPakistani\nBrahui\nSouth_American_Indian\nCarib, Carib_Indian\nFilipino\nPolynesian\nQatari, Katari\nRomanian, Rumanian\nMuscovite\nGeorgian\nSarawakian\nScandinavian, Norse, Northman\nSenegalese\nSlovene\nSouth_African\nSouth_American\nSudanese\nSyrian\nTahitian\nTanzanian\nTibetan\nTogolese\nTuareg\nTurki\nChuvash\nTurkoman, Turkmen, Turcoman\nUzbek, Uzbeg, Uzbak, Usbek, Usbeg\nUgandan\nUkranian\nYakut\nTungus, Evenk\nIgbo\nAmerican\nAnglo-American\nAlaska_Native, Alaskan_Native, Native_Alaskan\nArkansan, Arkansawyer\nCarolinian\nColoradan\nConnecticuter\nDelawarean, Delawarian\nFloridian\nGerman_American\nIllinoisan\nMainer, Down_Easter\nMarylander\nMinnesotan, Gopher\nNebraskan, Cornhusker\nNew_Hampshirite, Granite_Stater\nNew_Jerseyan, New_Jerseyite, Garden_Stater\nNew_Yorker\nNorth_Carolinian, Tarheel\nOregonian, Beaver\nPennsylvanian, Keystone_Stater\nTexan\nUtahan\nUruguayan\nVietnamese, Annamese\nGambian\nEast_German\nBerliner\nPrussian\nGhanian\nGuinean\nPapuan\nWalloon\nYemeni\nYugoslav, Jugoslav, Yugoslavian, Jugoslavian\nSerbian, Serb\nXhosa\nZairese, Zairean\nZimbabwean\nZulu\nGemini, Twin\nSagittarius, Archer\nPisces, Fish\nabbe\nabbess, mother_superior, prioress\nabnegator\nabridger, abbreviator\nabstractor, abstracter\nabsconder\nabsolver\nabecedarian\naberrant\nabettor, abetter\nabhorrer\nabomination\nabseiler, rappeller\nabstainer, ascetic\nacademic_administrator\nacademician\naccessory_before_the_fact\ncompanion\naccompanist, accompanyist\naccomplice, confederate\naccount_executive, account_representative, registered_representative, customer's_broker, customer's_man\naccused\naccuser\nacid_head\nacquaintance, friend\nacquirer\naerialist\naction_officer\nactive\nactive_citizen\nactor, histrion, player, thespian, role_player\nactor, doer, worker\naddict, nut, freak, junkie, junky\nadducer\nadjuster, adjustor, claims_adjuster, claims_adjustor, claim_agent\nadjutant, aide, aide-de-camp\nadjutant_general\nadmirer, adorer\nadoptee\nadulterer, fornicator\nadulteress, fornicatress, hussy, jade, loose_woman, slut, strumpet, trollop\nadvertiser, advertizer, adman\nadvisee\nadvocate, advocator, proponent, exponent\naeronautical_engineer\naffiliate\naffluent\naficionado\nbuck_sergeant\nagent-in-place\naggravator, annoyance\nagitator, fomenter\nagnostic\nagnostic, doubter\nagonist\nagony_aunt\nagriculturist, agriculturalist, cultivator, grower, raiser\nair_attache\nair_force_officer, commander\nairhead\nair_traveler, air_traveller\nalarmist\nalbino\nalcoholic, alky, dipsomaniac, boozer, lush, soaker, souse\nalderman\nalexic\nalienee, grantee\nalienor\naliterate, aliterate_person\nalgebraist\nallegorizer, allegoriser\nalliterator\nalmoner, medical_social_worker\nalpinist\naltar_boy\nalto\nambassador, embassador\nambassador\nambusher\namicus_curiae, friend_of_the_court\namoralist\namputee\nanalogist\nanalphabet, analphabetic\nanalyst\nindustry_analyst\nmarket_strategist\nanarchist, nihilist, syndicalist\nanathema, bete_noire\nancestor, ascendant, ascendent, antecedent, root\nanchor, anchorman, anchorperson\nancient\nanecdotist, raconteur\nangler, troller\nanimator\nanimist\nannotator\nannouncer\nannouncer\nanti\nanti-American\nanti-Semite, Jew-baiter\nAnzac\nape-man\naphakic\nappellant, plaintiff_in_error\nappointee\napprehender\nApril_fool\naspirant, aspirer, hopeful, wannabe, wannabee\nappreciator\nappropriator\nArabist\narchaist\narchbishop\narcher, bowman\narchitect, designer\narchivist\narchpriest, hierarch, high_priest, prelate, primate\nAristotelian, Aristotelean, Peripatetic\narmiger\narmy_attache\narmy_engineer, military_engineer\narmy_officer\narranger, adapter, transcriber\narrival, arriver, comer\narthritic\narticulator\nartilleryman, cannoneer, gunner, machine_gunner\nartist's_model, sitter\nassayer\nassemblyman\nassemblywoman\nassenter\nasserter, declarer, affirmer, asseverator, avower\nassignee\nassistant, helper, help, supporter\nassistant_professor\nassociate\nassociate\nassociate_professor\nastronaut, spaceman, cosmonaut\ncosmographer, cosmographist\natheist\nathlete, jock\nattendant, attender, tender\nattorney_general\nauditor\naugur, auspex\naunt, auntie, aunty\nau_pair_girl\nauthoritarian, dictator\nauthority\nauthorizer, authoriser\nautomobile_mechanic, auto-mechanic, car-mechanic, mechanic, grease_monkey\naviator, aeronaut, airman, flier, flyer\naviatrix, airwoman, aviatress\nayah\nbabu, baboo\nbaby, babe, sister\nbaby\nbaby_boomer, boomer\nbaby_farmer\nback\nbackbencher\nbackpacker, packer\nbackroom_boy, brain_truster\nbackscratcher\nbad_person\nbaggage\nbag_lady\nbailee\nbailiff\nbailor\nbairn\nbaker, bread_maker\nbalancer\nbalker, baulker, noncompliant\nball-buster, ball-breaker\nball_carrier, runner\nballet_dancer\nballet_master\nballet_mistress\nballetomane\nball_hawk\nballoonist\nballplayer, baseball_player\nbullfighter, toreador\nbanderillero\nmatador\npicador\nbandsman\nbanker\nbank_robber\nbankrupt, insolvent\nbantamweight\nbarmaid\nbaron, big_businessman, business_leader, king, magnate, mogul, power, top_executive, tycoon\nbaron\nbaron\nbartender, barman, barkeep, barkeeper, mixologist\nbaseball_coach, baseball_manager\nbase_runner, runner\nbasketball_player, basketeer, cager\nbasketweaver, basketmaker\nBasket_Maker\nbass, basso\nbastard, by-blow, love_child, illegitimate_child, illegitimate, whoreson\nbat_boy\nbather\nbatman\nbaton_twirler, twirler\nBavarian\nbeadsman, bedesman\nbeard\nbeatnik, beat\nbeauty_consultant\nBedouin, Beduin\nbedwetter, bed_wetter, wetter\nbeekeeper, apiarist, apiculturist\nbeer_drinker, ale_drinker\nbeggarman\nbeggarwoman\nbeldam, beldame\ntheist\nbeliever, truster\nbell_founder\nbenedick, benedict\nberserker, berserk\nbesieger\nbest, topper\nbetrothed\nBig_Brother\nbigot\nbig_shot, big_gun, big_wheel, big_cheese, big_deal, big_enchilada, big_fish, head_honcho\nbig_sister\nbilliard_player\nbiochemist\nbiographer\nbird_fancier\nbirth\nbirth-control_campaigner, birth-control_reformer\nbisexual, bisexual_person\nblack_belt\nblackmailer, extortioner, extortionist\nBlack_Muslim\nblacksmith\nblade\nbleacher\nblind_date\nbluecoat\nbluestocking, bas_bleu\nboatbuilder\nboatman, boater, waterman\nboatswain, bos'n, bo's'n, bosun, bo'sun\nbobby\nbodyguard, escort\nboffin\nBolshevik, Marxist, red, bolshie, bolshy\nBolshevik, Bolshevist\nbombshell\nbondman, bondsman\nbondwoman, bondswoman, bondmaid\nbondwoman, bondswoman, bondmaid\nbond_servant\nbook_agent\nbookbinder\nbookkeeper\nbookmaker\nbookworm\nbooster, shoplifter, lifter\nbootblack, shoeblack\nbootlegger, moonshiner\nbootmaker, boot_maker\nborderer\nborder_patrolman\nbotanist, phytologist, plant_scientist\nbottom_feeder\nboulevardier\nbounty_hunter\nbounty_hunter\nBourbon\nbowler\nslugger, slogger\ncub, lad, laddie, sonny, sonny_boy\nBoy_Scout\nboy_scout\nboy_wonder\nbragger, braggart, boaster, blowhard, line-shooter, vaunter\nbrahman, brahmin\nbrawler\nbreadwinner\nbreaststroker\nbreeder, stock_breeder\nbrick\nbride\nbridesmaid, maid_of_honor\nbridge_agent\nbroadcast_journalist\nBrother\nbrother-in-law\nbrowser\nBrummie, Brummy\nbuddy, brother, chum, crony, pal, sidekick\nbull\nbully\nbunny, bunny_girl\nburglar\nbursar\nbusboy, waiter's_assistant\nbusiness_editor\nbusiness_traveler\nbuster\nbusybody, nosy-parker, nosey-parker, quidnunc\nbuttinsky\ncabinetmaker, furniture_maker\ncaddie, golf_caddie\ncadet, plebe\ncaller, caller-out\ncall_girl\ncalligrapher, calligraphist\ncampaigner, candidate, nominee\ncamper\ncamp_follower\ncandidate, prospect\ncanonist\ncapitalist\ncaptain, headwaiter, maitre_d'hotel, maitre_d'\ncaptain, senior_pilot\ncaptain\ncaptain, chieftain\ncaptive\ncaptive\ncardinal\ncardiologist, heart_specialist, heart_surgeon\ncard_player\ncardsharp, card_sharp, cardsharper, card_sharper, sharper, sharpie, sharpy, card_shark\ncareerist\ncareer_man\ncaregiver\ncaretaker\ncaretaker\ncaricaturist\ncarillonneur\ncaroler, caroller\ncarpenter\ncarper, niggler\nCartesian\ncashier\ncasualty, injured_party\ncasualty\ncasuist, sophist\ncatechist\ncatechumen, neophyte\ncaterer\nCatholicos\ncat_fancier\nCavalier, Royalist\ncavalryman, trooper\ncaveman, cave_man, cave_dweller, troglodyte\ncelebrant\ncelebrant, celebrator, celebrater\ncelebrity, famous_person\ncellist, violoncellist\ncensor\ncensor\ncentenarian\ncentrist, middle_of_the_roader, moderate, moderationist\ncenturion\ncertified_public_accountant, CPA\nchachka, tsatske, tshatshke, tchotchke, tchotchkeleh\nchambermaid, fille_de_chambre\nchameleon\nchampion, champ, title-holder\nchandler\nprison_chaplain\ncharcoal_burner\ncharge_d'affaires\ncharioteer\ncharmer, beguiler\nchartered_accountant\nchartist, technical_analyst\ncharwoman, char, cleaning_woman, cleaning_lady, woman\nmale_chauvinist, sexist\ncheapskate, tightwad\nChechen\nchecker\ncheerer\ncheerleader\ncheerleader\nCheops, Khufu\nchess_master\nchief_executive_officer, CEO, chief_operating_officer\nchief_of_staff\nchief_petty_officer\nChief_Secretary\nchild, kid, youngster, minor, shaver, nipper, small_fry, tiddler, tike, tyke, fry, nestling\nchild, kid\nchild, baby\nchild_prodigy, infant_prodigy, wonder_child\nchimneysweeper, chimneysweep, sweep\nchiropractor\nchit\nchoker\nchoragus\nchoreographer\nchorus_girl, showgirl, chorine\nchosen\ncicerone\ncigar_smoker\ncipher, cypher, nobody, nonentity\ncircus_acrobat\ncitizen\ncity_editor\ncity_father\ncity_man\ncity_slicker, city_boy\ncivic_leader, civil_leader\ncivil_rights_leader, civil_rights_worker, civil_rights_activist\ncleaner\nclergyman, reverend, man_of_the_cloth\ncleric, churchman, divine, ecclesiastic\nclerk\nclever_Dick, clever_clogs\nclimatologist\nclimber\nclinician\ncloser, finisher\ncloset_queen\nclown, buffoon, goof, goofball, merry_andrew\nclown, buffoon\ncoach, private_instructor, tutor\ncoach, manager, handler\npitching_coach\ncoachman\ncoal_miner, collier, pitman\ncoastguardsman\ncobber\ncobbler, shoemaker\ncodger, old_codger\nco-beneficiary\ncog\ncognitive_neuroscientist\ncoiffeur\ncoiner\ncollaborator, cooperator, partner, pardner\ncolleen\ncollege_student, university_student\ncollegian, college_man, college_boy\ncolonial\ncolonialist\ncolonizer, coloniser\ncoloratura, coloratura_soprano\ncolor_guard\ncolossus, behemoth, giant, heavyweight, titan\ncomedian\ncomedienne\ncomer\ncommander\ncommander_in_chief, generalissimo\ncommanding_officer, commandant, commander\ncommissar, political_commissar\ncommissioned_officer\ncommissioned_military_officer\ncommissioner\ncommissioner\ncommittee_member\ncommitteewoman\ncommodore\ncommunicant\ncommunist, commie\nCommunist\ncommuter\ncompere\ncomplexifier\ncompulsive\ncomputational_linguist\ncomputer_scientist\ncomputer_user\nComrade\nconcert-goer, music_lover\nconciliator, make-peace, pacifier, peacemaker, reconciler\nconductor\nconfectioner, candymaker\nConfederate\nconfessor\nconfidant, intimate\nConfucian, Confucianist\nrep\nconqueror, vanquisher\nConservative\nNonconformist, chapelgoer\nAnglican\nconsignee\nconsigner, consignor\nconstable\nconstructivist\ncontractor\ncontralto\ncontributor\ncontrol_freak\nconvalescent\nconvener\nconvict, con, inmate, yard_bird, yardbird\ncopilot, co-pilot\ncopycat, imitator, emulator, ape, aper\ncoreligionist\ncornerback\ncorporatist\ncorrespondent, letter_writer\ncosmetician\ncosmopolitan, cosmopolite\nCossack\ncost_accountant\nco-star\ncostumier, costumer, costume_designer\ncotter, cottier\ncotter, cottar\ncounselor, counsellor\ncounterterrorist\ncounterspy, mole\ncountess\ncompromiser\ncountrywoman\ncounty_agent, agricultural_agent, extension_agent\ncourtier\ncousin, first_cousin, cousin-german, full_cousin\ncover_girl, pin-up, lovely\ncow\ncraftsman, artisan, journeyman, artificer\ncraftsman, crafter\ncrapshooter\ncrazy, loony, looney, nutcase, weirdo\ncreature, wight\ncreditor\ncreep, weirdo, weirdie, weirdy, spook\ncriminologist\ncritic\nCroesus\ncross-examiner, cross-questioner\ncrossover_voter, crossover\ncroupier\ncrown_prince\ncrown_princess\ncryptanalyst, cryptographer, cryptologist\nCub_Scout\ncuckold\ncultist\ncurandera\ncurate, minister_of_religion, minister, parson, pastor, rector\ncurator, conservator\ncustomer_agent\ncutter, carver\ncyberpunk\ncyborg, bionic_man, bionic_woman\ncymbalist\nCynic\ncytogeneticist\ncytologist\nczar\nczar, tsar, tzar\ndad, dada, daddy, pa, papa, pappa, pop\ndairyman\nDalai_Lama, Grand_Lama\ndallier, dillydallier, dilly-dallier, mope, lounger\ndancer, professional_dancer, terpsichorean\ndancer, social_dancer\nclog_dancer\ndancing-master, dance_master\ndark_horse\ndarling, favorite, favourite, pet, dearie, deary, ducky\ndate, escort\ndaughter, girl\ndawdler, drone, laggard, lagger, trailer, poke\nday_boarder\nday_laborer, day_labourer\ndeacon, Protestant_deacon\ndeaconess\ndeadeye\ndeipnosophist\ndropout\ndeadhead\ndeaf_person\ndebtor, debitor\ndeckhand, roustabout\ndefamer, maligner, slanderer, vilifier, libeler, backbiter, traducer\ndefense_contractor\ndeist, freethinker\ndelegate\ndeliveryman, delivery_boy, deliverer\ndemagogue, demagog, rabble-rouser\ndemigod, superman, Ubermensch\ndemographer, demographist, population_scientist\ndemonstrator, protester\nden_mother\ndepartment_head\ndepositor\ndeputy\ndermatologist, skin_doctor\ndescender\ndesignated_hitter\ndesigner, intriguer\ndesk_clerk, hotel_desk_clerk, hotel_clerk\ndesk_officer\ndesk_sergeant, deskman, station_keeper\ndetainee, political_detainee\ndetective, investigator, tec, police_detective\ndetective\ndetractor, disparager, depreciator, knocker\ndeveloper\ndeviationist\ndevisee\ndevisor\ndevourer\ndialectician\ndiarist, diary_keeper, journalist\ndietician, dietitian, nutritionist\ndiocesan\ndirector, theater_director, theatre_director\ndirector\ndirty_old_man\ndisbeliever, nonbeliever, unbeliever\ndisk_jockey, disc_jockey, dj\ndispatcher\ndistortionist\ndistributor, distributer\ndistrict_attorney, DA\ndistrict_manager\ndiver, plunger\ndivorcee, grass_widow\nex-wife, ex\ndivorce_lawyer\ndocent\ndoctor, doc, physician, MD, Dr., medico\ndodo, fogy, fogey, fossil\ndoge\ndog_in_the_manger\ndogmatist, doctrinaire\ndolichocephalic\ndomestic_partner, significant_other, spousal_equivalent, spouse_equivalent\nDominican\ndominus, dominie, domine, dominee\ndon, father\nDonatist\ndonna\ndosser, street_person\ndouble, image, look-alike\ndouble-crosser, double-dealer, two-timer, betrayer, traitor\ndown-and-out\ndoyenne\ndraftsman, drawer\ndramatist, playwright\ndreamer\ndressmaker, modiste, needlewoman, seamstress, sempstress\ndressmaker's_model\ndribbler, driveller, slobberer, drooler\ndribbler\ndrinker, imbiber, toper, juicer\ndrinker\ndrug_addict, junkie, junky\ndrug_user, substance_abuser, user\nDruid\ndrum_majorette, majorette\ndrummer\ndrunk\ndrunkard, drunk, rummy, sot, inebriate, wino\nDruze, Druse\ndry, prohibitionist\ndry_nurse\nduchess\nduke\nduffer\ndunker\nDutch_uncle\ndyspeptic\neager_beaver, busy_bee, live_wire, sharpie, sharpy\nearl\nearner, wage_earner\neavesdropper\neccentric, eccentric_person, flake, oddball, geek\neclectic, eclecticist\neconometrician, econometrist\neconomist, economic_expert\nectomorph\neditor, editor_in_chief\negocentric, egoist\negotist, egoist, swellhead\nejaculator\nelder\nelder_statesman\nelected_official\nelectrician, lineman, linesman\nelegist\nelocutionist\nemancipator, manumitter\nembryologist\nemeritus\nemigrant, emigre, emigree, outgoer\nemissary, envoy\nempress\nemployee\nemployer\nenchantress, witch\nenchantress, temptress, siren, Delilah, femme_fatale\nencyclopedist, encyclopaedist\nendomorph\nenemy, foe, foeman, opposition\nenergizer, energiser, vitalizer, vitaliser, animator\nend_man\nend_man, corner_man\nendorser, indorser\nenjoyer\nenlisted_woman\nenophile, oenophile\nentrant\nentrant\nentrepreneur, enterpriser\nenvoy, envoy_extraordinary, minister_plenipotentiary\nenzymologist\neparch\nepidemiologist\nepigone, epigon\nepileptic\nEpiscopalian\nequerry\nequerry\nerotic\nescapee\nescapist, dreamer, wishful_thinker\nEskimo, Esquimau, Inuit\nespionage_agent\nesthetician, aesthetician\netcher\nethnologist\nEtonian\netymologist\nevangelist, revivalist, gospeler, gospeller\nEvangelist\nevent_planner\nexaminer, inspector\nexaminer, tester, quizzer\nexarch\nexecutant\nexecutive_secretary\nexecutive_vice_president\nexecutrix\nexegete\nexhibitor, exhibitioner, shower\nexhibitionist, show-off\nexile, expatriate, expat\nexistentialist, existentialist_philosopher, existential_philosopher\nexorcist, exorciser\nex-spouse\nextern, medical_extern\nextremist\nextrovert, extravert\neyewitness\nfacilitator\nfairy_godmother\nfalangist, phalangist\nfalconer, hawker\nfalsifier\nfamiliar\nfan, buff, devotee, lover\nfanatic, fiend\nfancier, enthusiast\nfarm_boy\nfarmer, husbandman, granger, sodbuster\nfarmhand, fieldhand, field_hand, farm_worker\nfascist\nfascista\nfatalist, determinist, predestinarian, predestinationist\nfather, male_parent, begetter\nFather, Padre\nfather-figure\nfather-in-law\nFauntleroy, Little_Lord_Fauntleroy\nFauve, fauvist\nfavorite_son\nfeatherweight\nfederalist\nfellow_traveler, fellow_traveller\nfemale_aristocrat\nfemale_offspring\nfemale_child, girl, little_girl\nfence\nfiance, groom-to-be\nfielder, fieldsman\nfield_judge\nfighter_pilot\nfiler\nfilm_director, director\nfinder\nfire_chief, fire_marshal\nfire-eater, fire-swallower\nfire-eater, hothead\nfireman, firefighter, fire_fighter, fire-eater\nfire_marshall\nfire_walker\nfirst_baseman, first_sacker\nfirstborn, eldest\nfirst_lady\nfirst_lieutenant, 1st_lieutenant\nfirst_offender\nfirst_sergeant, sergeant_first_class\nfishmonger, fishwife\nflagellant\nflag_officer\nflak_catcher, flak, flack_catcher, flack\nflanker_back, flanker\nflapper\nflatmate\nflatterer, adulator\nflibbertigibbet, foolish_woman\nflight_surgeon\nfloorwalker, shopwalker\nflop, dud, washout\nFlorentine\nflower_girl\nflower_girl\nflutist, flautist, flute_player\nfly-by-night\nflyweight\nflyweight\nfoe, enemy\nfolk_dancer\nfolk_poet\nfollower\nfootball_hero\nfootball_player, footballer\nfootman\nforefather, father, sire\nforemother\nforeign_agent\nforeigner, outsider\nboss\nforeman\nforester, tree_farmer, arboriculturist\nforewoman\nforger, counterfeiter\nforward\nfoster-brother, foster_brother\nfoster-father, foster_father\nfoster-mother, foster_mother\nfoster-sister, foster_sister\nfoster-son, foster_son\nfounder, beginner, founding_father, father\nfoundress\nfour-minute_man\nframer\nFrancophobe\nfreak, monster, monstrosity, lusus_naturae\nfree_agent, free_spirit, freewheeler\nfree_agent\nfreedom_rider\nfree-liver\nfreeloader\nfree_trader\nFreudian\nfriar, mendicant\nmonk, monastic\nfrontierswoman\nfront_man, front, figurehead, nominal_head, straw_man, strawman\nfrotteur\nfucker\nfucker\nfuddy-duddy\nfullback\nfunambulist, tightrope_walker\nfundamentalist\nfundraiser\nfuturist\ngadgeteer\ngagman, gagster, gagwriter\ngagman, standup_comedian\ngainer, weight_gainer\ngal\ngaloot\ngambist\ngambler\ngamine\ngarbage_man, garbageman, garbage_collector, garbage_carter, garbage_hauler, refuse_collector, dustman\ngardener\ngarment_cutter\ngarroter, garrotter, strangler, throttler, choker\ngasman\ngastroenterologist\ngatherer\ngawker\ngendarme\ngeneral, full_general\ngenerator, source, author\ngeneticist\ngenitor\ngent\ngeologist\ngeophysicist\nghostwriter, ghost\nGibson_girl\ngirl, miss, missy, young_lady, young_woman, fille\ngirlfriend, girl, lady_friend\ngirlfriend\ngirl_wonder\nGirondist, Girondin\ngitano\ngladiator\nglassblower\ngleaner\ngoat_herder, goatherd\ngodchild\ngodfather\ngodparent\ngodson\ngofer\ngoffer, gopher\ngoldsmith, goldworker, gold-worker\ngolfer, golf_player, linksman\ngondolier, gondoliere\ngood_guy\ngood_old_boy, good_ole_boy, good_ol'_boy\ngood_Samaritan\ngossip_columnist\ngouger\ngovernor_general\ngrabber\ngrader\ngraduate_nurse, trained_nurse\ngrammarian, syntactician\ngranddaughter\ngrande_dame\ngrandfather, gramps, granddad, grandad, granddaddy, grandpa\nGrand_Inquisitor\ngrandma, grandmother, granny, grannie, gran, nan, nanna\ngrandmaster\ngrandparent\ngrantee\ngranter\ngrass_widower, divorced_man\ngreat-aunt, grandaunt\ngreat_grandchild\ngreat_granddaughter\ngreat_grandmother\ngreat_grandparent\ngreat_grandson\ngreat-nephew, grandnephew\ngreat-niece, grandniece\nGreen_Beret\ngrenadier, grenade_thrower\ngreeter, saluter, welcomer\ngringo\ngrinner\ngrocer\ngroom, bridegroom\ngroom, bridegroom\ngrouch, grump, crank, churl, crosspatch\ngroup_captain\ngrunter\nprison_guard, jailer, jailor, gaoler, screw, turnkey\nguard\nguesser\nguest, invitee\nguest\nguest_of_honor\nguest_worker, guestworker\nguide\nguitarist, guitar_player\ngunnery_sergeant\nguru\nguru\nguvnor\nguy, cat, hombre, bozo\ngymnast\ngym_rat\ngynecologist, gynaecologist, woman's_doctor\nGypsy, Gipsy, Romany, Rommany, Romani, Roma, Bohemian\nhack, drudge, hacker\nhacker, cyber-terrorist, cyberpunk\nhaggler\nhairdresser, hairstylist, stylist, styler\nhakim, hakeem\nHakka\nhalberdier\nhalfback\nhalf_blood\nhand\nanimal_trainer, handler\nhandyman, jack_of_all_trades, odd-job_man\nhang_glider\nhardliner\nharlequin\nharmonizer, harmoniser\nhash_head\nhatchet_man, iceman\nhater\nhatmaker, hatter, milliner, modiste\nheadman, tribal_chief, chieftain, chief\nheadmaster, schoolmaster, master\nhead_nurse\nhearer, listener, auditor, attender\nheartbreaker\nheathen, pagan, gentile, infidel\nheavyweight\nheavy\nheckler, badgerer\nhedger\nhedger, equivocator, tergiversator\nhedonist, pagan, pleasure_seeker\nheir, inheritor, heritor\nheir_apparent\nheiress, inheritress, inheritrix\nheir_presumptive\nhellion, heller, devil\nhelmsman, steersman, steerer\nhire\nhematologist, haematologist\nhemiplegic\nherald, trumpeter\nherbalist, herb_doctor\nherder, herdsman, drover\nhermaphrodite, intersex, gynandromorph, androgyne, epicene, epicene_person\nheroine\nheroin_addict\nhero_worshiper, hero_worshipper\nHerr\nhighbinder\nhighbrow\nhigh_commissioner\nhighflier, highflyer\nHighlander, Scottish_Highlander, Highland_Scot\nhigh-muck-a-muck, pooh-bah\nhigh_priest\nhighjacker, hijacker\nhireling, pensionary\nhistorian, historiographer\nhitchhiker\nhitter, striker\nhobbyist\nholdout\nholdover, hangover\nholdup_man, stickup_man\nhomeboy\nhomeboy\nhome_buyer\nhomegirl\nhomeless, homeless_person\nhomeopath, homoeopath\nhonest_woman\nhonor_guard, guard_of_honor\nhooker\nhoper\nhornist\nhorseman, equestrian, horseback_rider\nhorse_trader\nhorsewoman\nhorse_wrangler, wrangler\nhorticulturist, plantsman\nhospital_chaplain\nhost, innkeeper, boniface\nhost\nhostess\nhotelier, hotelkeeper, hotel_manager, hotelman, hosteller\nhousekeeper\nhousemaster\nhousemate\nhouse_physician, resident, resident_physician\nhouse_sitter\nhousing_commissioner\nhuckster, cheap-jack\nhugger\nhumanist, humanitarian\nhumanitarian, do-gooder, improver\nhunk\nhuntress\nex-husband, ex\nhydrologist\nhyperope\nhypertensive\nhypnotist, hypnotizer, hypnotiser, mesmerist, mesmerizer\nhypocrite, dissembler, dissimulator, phony, phoney, pretender\niceman\niconoclast\nideologist, ideologue\nidol, matinee_idol\nidolizer, idoliser\nimam, imaum\nimperialist\nimportant_person, influential_person, personage\ninamorato\nincumbent, officeholder\nincurable\ninductee\nindustrialist\ninfanticide\ninferior\ninfernal\ninfielder\ninfiltrator\ninformer, betrayer, rat, squealer, blabber\ningenue\ningenue\npolymath\nin-law, relative-in-law\ninquiry_agent\ninspector\ninspector_general\ninstigator, initiator\ninsurance_broker, insurance_agent, general_agent, underwriter\ninsurgent, insurrectionist, freedom_fighter, rebel\nintelligence_analyst\ninterior_designer, designer, interior_decorator, house_decorator, room_decorator, decorator\ninterlocutor, conversational_partner\ninterlocutor, middleman\nInternational_Grandmaster\ninternationalist\ninternist\ninterpreter, translator\ninterpreter\nintervenor\nintrovert\ninvader, encroacher\ninvalidator, voider, nullifier\ninvestigator\ninvestor\ninvigilator\nirreligionist\nIvy_Leaguer\nJack_of_all_trades\nJacksonian\nJane_Doe\njanissary\nJat\nJavanese, Javan\nJekyll_and_Hyde\njester, fool, motley_fool\nJesuit\njezebel\njilt\njobber, middleman, wholesaler\njob_candidate\nJob's_comforter\njockey\nJohn_Doe\njournalist\njudge, justice, jurist\njudge_advocate\njuggler\nJungian\njunior\njunior\nJunior, Jr, Jnr\njunior_lightweight\njunior_middleweight\njurist, legal_expert\njuror, juryman, jurywoman\njustice_of_the_peace\njusticiar, justiciary\nkachina\nkeyboardist\nKhedive\nkingmaker\nking, queen, world-beater\nKing's_Counsel\nCounsel_to_the_Crown\nkin, kinsperson, family\nenate, matrikin, matrilineal_kin, matrisib, matrilineal_sib\nkink\nkinswoman\nkisser, osculator\nkitchen_help\nkitchen_police, KP\nKlansman, Ku_Kluxer, Kluxer\nkleptomaniac\nkneeler\nknight\nknocker\nknower, apprehender\nknow-it-all, know-all\nkolkhoznik\nKshatriya\nlabor_coach, birthing_coach, doula, monitrice\nlaborer, manual_laborer, labourer, jack\nLabourite\nlady\nlady-in-waiting\nlady's_maid\nlama\nlamb, dear\nlame_duck\nlamplighter\nland_agent\nlandgrave\nlandlubber, lubber, landsman\nlandlubber, landsman, landman\nlandowner, landholder, property_owner\nlandscape_architect, landscape_gardener, landscaper, landscapist\nlanglaufer\nlanguisher\nlapidary, lapidarist\nlass, lassie, young_girl, jeune_fille\nLatin\nLatin\nlatitudinarian\nJehovah's_Witness\nlaw_agent\nlawgiver, lawmaker\nlawman, law_officer, peace_officer\nlaw_student\nlawyer, attorney\nlay_reader\nlazybones\nleaker\nleaseholder, lessee\nlector, lecturer, reader\nlector, reader\nlecturer\nleft-hander, lefty, southpaw\nlegal_representative\nlegate, official_emissary\nlegatee\nlegionnaire, legionary\nletterman\nliberator\nlicenser\nlicentiate\nlieutenant\nlieutenant_colonel, light_colonel\nlieutenant_commander\nlieutenant_junior_grade, lieutenant_JG\nlife\nlifeguard, lifesaver\nlife_tenant\nlight_flyweight\nlight_heavyweight, cruiserweight\nlight_heavyweight\nlight-o'-love, light-of-love\nlightweight\nlightweight\nlightweight\nlilliputian\nlimnologist\nlineman\nline_officer\nlion-hunter\nlisper\nlister\nliterary_critic\nliterate, literate_person\nlitigant, litigator\nlitterer, litterbug, litter_lout\nlittle_brother\nlittle_sister\nlobbyist\nlocksmith\nlocum_tenens, locum\nLord, noble, nobleman\nloser\nloser, also-ran\nfailure, loser, nonstarter, unsuccessful_person\nLothario\nloudmouth, blusterer\nlowerclassman, underclassman\nLowlander, Scottish_Lowlander, Lowland_Scot\nloyalist, stalwart\nLuddite\nlumberman, lumberjack, logger, feller, faller\nlumper\nbedlamite\npyromaniac\nlutist, lutanist, lutenist\nLutheran\nlyricist, lyrist\nmacebearer, mace, macer\nmachinist, mechanic, shop_mechanic\nmadame\nmaenad\nmaestro, master\nmagdalen\nmagician, prestidigitator, conjurer, conjuror, illusionist\nmagus\nmaharani, maharanee\nmahatma\nmaid, maiden\nmaid, maidservant, housemaid, amah\nmajor\nmajor\nmajor-domo, seneschal\nmaker, shaper\nmalahini\nmalcontent\nmalik\nmalingerer, skulker, shammer\nMalthusian\nadonis\nman\nman\nmanageress\nmandarin\nmaneuverer, manoeuvrer\nmaniac\nManichaean, Manichean, Manichee\nmanicurist\nmanipulator\nman-at-arms\nman_of_action, man_of_deeds\nman_of_letters\nmanufacturer, producer\nmarcher, parader\nmarchioness, marquise\nmargrave\nmargrave\nMarine, devil_dog, leatherneck, shipboard_soldier\nmarquess\nmarquis, marquess\nmarshal, marshall\nmartinet, disciplinarian, moralist\nmascot\nmasochist\nmason, stonemason\nmasquerader, masker, masquer\nmasseur\nmasseuse\nmaster\nmaster, captain, sea_captain, skipper\nmaster-at-arms\nmaster_of_ceremonies, emcee, host\nmasturbator, onanist\nmatchmaker, matcher, marriage_broker\nmate, first_mate\nmate\nmate\nmater\nmaterial\nmaterialist\nmatriarch, materfamilias\nmatriarch\nmatriculate\nmatron\nmayor, city_manager\nmayoress\nmechanical_engineer\nmedalist, medallist, medal_winner\nmedical_officer, medic\nmedical_practitioner, medical_man\nmedical_scientist\nmedium, spiritualist, sensitive\nmegalomaniac\nmelancholic, melancholiac\nMelkite, Melchite\nmelter\nnonmember\nboard_member\nclansman, clanswoman, clan_member\nmemorizer, memoriser\nMendelian\nmender, repairer, fixer\nMesoamerican\nmessmate\nmestiza\nmeteorologist\nmeter_maid\nMethodist\nMetis\nmetropolitan\nmezzo-soprano, mezzo\nmicroeconomist, microeconomic_expert\nmiddle-aged_man\nmiddlebrow\nmiddleweight\nmidwife, accoucheuse\nmikado, tenno\nMilanese\nmiler\nmiles_gloriosus\nmilitary_attache\nmilitary_chaplain, padre, Holy_Joe, sky_pilot\nmilitary_leader\nmilitary_officer, officer\nmilitary_policeman, MP\nmill_agent\nmill-hand, factory_worker\nmillionairess\nmillwright\nminder\nmining_engineer\nminister, government_minister\nministrant\nminor_leaguer, bush_leaguer\nMinuteman\nmisanthrope, misanthropist\nmisfit\nmistress\nmistress, kept_woman, fancy_woman\nmixed-blood\nmodel, poser\nclass_act\nmodeler, modeller\nmodifier\nmolecular_biologist\nMonegasque, Monacan\nmonetarist\nmoneygrubber\nmoneymaker\nMongoloid\nmonolingual\nmonologist\nmoonlighter\nmoralist\nmorosoph\nmorris_dancer\nmortal_enemy\nmortgagee, mortgage_holder\nmortician, undertaker, funeral_undertaker, funeral_director\nmoss-trooper\nmother, female_parent\nmother\nmother\nmother_figure\nmother_hen\nmother-in-law\nmother's_boy, mamma's_boy, mama's_boy\nmother's_daughter\nmotorcycle_cop, motorcycle_policeman, speed_cop\nmotorcyclist\nMound_Builder\nmountebank, charlatan\nmourner, griever, sorrower, lamenter\nmouthpiece, mouth\nmover\nmoviegoer, motion-picture_fan\nmuffin_man\nmugwump, independent, fencesitter\nMullah, Mollah, Mulla\nmuncher\nmurderess\nmurder_suspect\nmusher\nmusician, instrumentalist, player\nmusicologist\nmusic_teacher\nmusketeer\nMuslimah\nmutilator, maimer, mangler\nmutineer\nmute, deaf-mute, deaf-and-dumb_person\nmutterer, mumbler, murmurer\nmuzzler\nMycenaen\nmycologist\nmyope\nmyrmidon\nmystic, religious_mystic\nmythologist\nnaif\nnailer\nnamby-pamby\nname_dropper\nnamer\nnan\nnanny, nursemaid, nurse\nnarc, nark, narcotics_agent\nnarcissist, narcist\nnark, copper's_nark\nnationalist\nnautch_girl\nnaval_commander\nNavy_SEAL, SEAL\nobstructionist, obstructor, obstructer, resister, thwarter\nNazarene\nNazarene, Ebionite\nNazi, German_Nazi\nnebbish, nebbech\nnecker\nneonate, newborn, newborn_infant, newborn_baby\nnephew\nneurobiologist\nneurologist, brain_doctor\nneurosurgeon, brain_surgeon\nneutral\nneutralist\nnewcomer, fledgling, fledgeling, starter, neophyte, freshman, newbie, entrant\nnewcomer\nNew_Dealer\nnewspaper_editor\nnewsreader, news_reader\nNewtonian\nniece\nniggard, skinflint, scrooge, churl\nnight_porter\nnight_rider, nightrider\nNIMBY\nniqaabi\nnitpicker\nNobelist, Nobel_Laureate\nNOC\nnoncandidate\nnoncommissioned_officer, noncom, enlisted_officer\nnondescript\nnondriver\nnonparticipant\nnonperson, unperson\nnonresident\nnonsmoker\nNorthern_Baptist\nnoticer\nnovelist\nnovitiate, novice\nnuclear_chemist, radiochemist\nnudger\nnullipara\nnumber_theorist\nnurse\nnursling, nurseling, suckling\nnymph, houri\nnymphet\nnympholept\nnymphomaniac, nympho\noarswoman\noboist\nobscurantist\nobserver, commentator\nobstetrician, accoucheur\noccupier\noccultist\nwine_lover\nofferer, offeror\noffice-bearer\noffice_boy\nofficeholder, officer\nofficiant\nFederal, Fed, federal_official\noilman\noil_tycoon\nold-age_pensioner\nold_boy\nold_lady\nold_man\noldster, old_person, senior_citizen, golden_ager\nold-timer, oldtimer, gaffer, old_geezer, antique\nold_woman\noligarch\nOlympian\nomnivore\noncologist\nonlooker, looker-on\nonomancer\noperator\nopportunist, self-seeker\noptimist\nOrangeman\norator, speechmaker, rhetorician, public_speaker, speechifier\norderly, hospital_attendant\norderly\norderly_sergeant\nordinand\nordinary\norgan-grinder\norganist\norganization_man\norganizer, organiser, arranger\norganizer, organiser, labor_organizer\noriginator, conceiver, mastermind\nornithologist, bird_watcher\norphan\norphan\nosteopath, osteopathist\nout-and-outer\noutdoorswoman\noutfielder\noutfielder\nright_fielder\nright-handed_pitcher, right-hander\noutlier\nowner-occupier\noyabun\npackrat\npadrone\npadrone\npage, pageboy\npainter\nPaleo-American, Paleo-Amerind, Paleo-Indian\npaleontologist, palaeontologist, fossilist\npallbearer, bearer\npalmist, palmister, chiromancer\npamperer, spoiler, coddler, mollycoddler\nPanchen_Lama\npanelist, panellist\npanhandler\npaparazzo\npaperboy\npaperhanger, paperer\npaperhanger\npapoose, pappoose\npardoner\nparetic\nparishioner\npark_commissioner\nParliamentarian, Member_of_Parliament\nparliamentary_agent\nparodist, lampooner\nparricide\nparrot\npartaker, sharer\npart-timer\nparty\nparty_man, party_liner\npassenger, rider\npasser\npaster\npater\npatient\npatriarch\npatriarch\npatriarch, paterfamilias\npatriot, nationalist\npatron, sponsor, supporter\npatternmaker\npawnbroker\npayer, remunerator\npeacekeeper\npeasant\npedant, bookworm, scholastic\npeddler, pedlar, packman, hawker, pitchman\npederast, paederast, child_molester\npenologist\npentathlete\nPentecostal, Pentecostalist\npercussionist\nperiodontist\npeshmerga\npersonality\npersonal_representative\npersonage\npersona_grata\npersona_non_grata\npersonification\nperspirer, sweater\npervert, deviant, deviate, degenerate\npessimist\npest, blighter, cuss, pesterer, gadfly\nPeter_Pan\npetitioner, suppliant, supplicant, requester\npetit_juror, petty_juror\npet_sitter, critter_sitter\npetter, fondler\nPharaoh, Pharaoh_of_Egypt\npharmacist, druggist, chemist, apothecary, pill_pusher, pill_roller\nphilanthropist, altruist\nphilatelist, stamp_collector\nphilosopher\nphonetician\nphonologist\nphotojournalist\nphotometrist, photometrician\nphysical_therapist, physiotherapist\nphysicist\npiano_maker\npicker, chooser, selector\npicnicker, picknicker\npilgrim\npill\npillar, mainstay\npill_head\npilot\nPiltdown_man, Piltdown_hoax\npimp, procurer, panderer, pander, pandar, fancy_man, ponce\npipe_smoker\npip-squeak, squirt, small_fry\npisser, urinator\npitcher, hurler, twirler\npitchman\nplaceman, placeseeker\nplacer_miner\nplagiarist, plagiarizer, plagiariser, literary_pirate, pirate\nplainsman\nplanner, contriver, deviser\nplanter, plantation_owner\nplasterer\nplatinum_blond, platinum_blonde\nplatitudinarian\nplayboy, man-about-town, Corinthian\nplayer, participant\nplaymate, playfellow\npleaser\npledger\nplenipotentiary\nplier, plyer\nplodder, slowpoke, stick-in-the-mud, slowcoach\nplodder, slogger\nplotter, mapper\nplumber, pipe_fitter\npluralist\npluralist\npoet\npointsman\npoint_woman\npolicyholder\npolitical_prisoner\npolitical_scientist\npolitician, politico, pol, political_leader\npolitician\npollster, poll_taker, headcounter, canvasser\npolluter, defiler\npool_player\nportraitist, portrait_painter, portrayer, limner\nposeuse\npositivist, rationalist\npostdoc, post_doc\nposter_girl\npostulator\nprivate_citizen\nproblem_solver, solver, convergent_thinker\npro-lifer\nprosthetist\npostulant\npotboy, potman\npoultryman, poulterer\npower_user\npower_worker, power-station_worker\npractitioner, practician\nprayer, supplicant\npreceptor, don\npredecessor\npreemptor, pre-emptor\npreemptor, pre-emptor\npremature_baby, preterm_baby, premature_infant, preterm_infant, preemie, premie\npresbyter\npresenter, sponsor\npresentist\npreserver\npresident\nPresident_of_the_United_States, United_States_President, President, Chief_Executive\npresident, prexy\npress_agent, publicity_man, public_relations_man, PR_man\npress_photographer\npriest\nprima_ballerina\nprima_donna, diva\nprima_donna\nprimigravida, gravida_I\nprimordial_dwarf, hypoplastic_dwarf, true_dwarf, normal_dwarf\nprince_charming\nprince_consort\nprinceling\nPrince_of_Wales\nprincess\nprincess_royal\nprincipal, dealer\nprincipal, school_principal, head_teacher, head\nprint_seller\nprior\nprivate, buck_private, common_soldier\nprobationer, student_nurse\nprocessor\nprocess-server\nproconsul\nproconsul\nproctologist\nproctor, monitor\nprocurator\nprocurer, securer\nprofit_taker\nprogrammer, computer_programmer, coder, software_engineer\npromiser, promisor\npromoter, booster, plugger\npromulgator\npropagandist\npropagator, disseminator\nproperty_man, propman, property_master\nprophetess\nprophet\nprosecutor, public_prosecutor, prosecuting_officer, prosecuting_attorney\nprospector\nprotectionist\nprotegee\nprotozoologist\nprovost_marshal\npruner, trimmer\npsalmist\npsephologist\npsychiatrist, head-shrinker, shrink\npsychic\npsycholinguist\npsychophysicist\npublican, tavern_keeper\npudge\npuerpera\npunching_bag\npunter\npunter\npuppeteer\npuppy, pup\npurchasing_agent\npuritan\nPuritan\npursuer\npusher, shover\npusher, drug_peddler, peddler, drug_dealer, drug_trafficker\npusher, thruster\nputz\nPygmy, Pigmy\nqadi\nquadriplegic\nquadruplet, quad\nquaker, trembler\nquarter\nquarterback, signal_caller, field_general\nquartermaster\nquartermaster_general\nQuebecois\nqueen, queen_regnant, female_monarch\nQueen_of_England\nqueen\nqueen\nqueen_consort\nqueen_mother\nQueen's_Counsel\nquestion_master, quizmaster\nquick_study, sponge\nquietist\nquitter\nrabbi\nracist, racialist\nradiobiologist\nradiologic_technologist\nradiologist, radiotherapist\nrainmaker\nraiser\nraja, rajah\nrake, rakehell, profligate, rip, blood, roue\nramrod\nranch_hand\nranker\nranter, raver\nrape_suspect\nrapper\nrapporteur\nrare_bird, rara_avis\nratepayer\nraw_recruit\nreader\nreading_teacher\nrealist\nreal_estate_broker, real_estate_agent, estate_agent, land_agent, house_agent\nrear_admiral\nreceiver\nreciter\nrecruit, enlistee\nrecruit, military_recruit\nrecruiter\nrecruiting-sergeant\nredcap\nredhead, redheader, red-header, carrottop\nredneck, cracker\nreeler\nreenactor\nreferral\nreferee, ref\nrefiner\nReform_Jew\nregistered_nurse, RN\nregistrar\nRegius_professor\nreliever, allayer, comforter\nanchorite, hermit\nreligious_leader\nremover\nRenaissance_man, generalist\nrenegade\nrentier\nrepairman, maintenance_man, service_man\nreporter, newsman, newsperson\nnewswoman\nrepresentative\nreprobate, miscreant\nrescuer, recoverer, saver\nreservist\nresident_commissioner\nrespecter\nrestaurateur, restauranter\nrestrainer, controller\nretailer, retail_merchant\nretiree, retired_person\nreturning_officer\nrevenant\nrevisionist\nrevolutionist, revolutionary, subversive, subverter\nrheumatologist\nRhodesian_man, Homo_rhodesiensis\nrhymer, rhymester, versifier, poetizer, poetiser\nrich_person, wealthy_person, have\nrider\nriding_master\nrifleman\nright-hander, right_hander, righthander\nright-hand_man, chief_assistant, man_Friday\nringer\nringleader\nroadman, road_mender\nroarer, bawler, bellower, screamer, screecher, shouter, yeller\nrocket_engineer, rocket_scientist\nrocket_scientist\nrock_star\nRomanov, Romanoff\nromanticist, romantic\nropemaker, rope-maker, roper\nroper\nroper\nropewalker, ropedancer\nrosebud\nRosicrucian\nMountie\nRough_Rider\nroundhead\ncivil_authority, civil_officer\nrunner\nrunner\nrunner\nrunning_back\nrusher\nrustic\nsaboteur, wrecker, diversionist\nsadist\nsailing_master, navigator\nsailor, crewman\nsalesgirl, saleswoman, saleslady\nsalesman\nsalesperson, sales_representative, sales_rep\nsalvager, salvor\nsandwichman\nsangoma\nsannup\nsapper\nSassenach\nsatrap\nsaunterer, stroller, ambler\nSavoyard\nsawyer\nscalper\nscandalmonger\nscapegrace, black_sheep\nscene_painter\nschemer, plotter\nschizophrenic\nschlemiel, shlemiel\nschlockmeister, shlockmeister\nscholar, scholarly_person, bookman, student\nscholiast\nschoolchild, school-age_child, pupil\nschoolfriend\nSchoolman, medieval_Schoolman\nschoolmaster\nschoolmate, classmate, schoolfellow, class_fellow\nscientist\nscion\nscoffer, flouter, mocker, jeerer\nscofflaw\nscorekeeper, scorer\nscorer\nscourer\nscout, talent_scout\nscoutmaster\nscrambler\nscratcher\nscreen_actor, movie_actor\nscrutineer, canvasser\nscuba_diver\nsculptor, sculpturer, carver, statue_maker\nSea_Scout\nseasonal_worker, seasonal\nseasoner\nsecond_baseman, second_sacker\nsecond_cousin\nseconder\nsecond_fiddle, second_banana\nsecond-in-command\nsecond_lieutenant, 2nd_lieutenant\nsecond-rater, mediocrity\nsecretary\nSecretary_of_Agriculture, Agriculture_Secretary\nSecretary_of_Health_and_Human_Services\nSecretary_of_State\nSecretary_of_the_Interior, Interior_Secretary\nsectarian, sectary, sectarist\nsection_hand\nsecularist\nsecurity_consultant\nseeded_player, seed\nseeder, cloud_seeder\nseeker, searcher, quester\nsegregate\nsegregator, segregationist\nselectman\nselectwoman\nselfish_person\nself-starter\nseller, marketer, vender, vendor, trafficker\nselling_agent\nsemanticist, semiotician\nsemifinalist\nseminarian, seminarist\nsenator\nsendee\nsenior\nsenior_vice_president\nseparatist, separationist\nseptuagenarian\nserf, helot, villein\nspree_killer\nserjeant-at-law, serjeant, sergeant-at-law, sergeant\nserver\nserviceman, military_man, man, military_personnel\nsettler, colonist\nsettler\nsex_symbol\nsexton, sacristan\nshaheed\nShakespearian, Shakespearean\nshanghaier, seizer\nsharecropper, cropper, sharecrop_farmer\nshaver\nShavian\nsheep\nsheik, tribal_sheik, sheikh, tribal_sheikh, Arab_chief\nshelver\nshepherd\nship-breaker\nshipmate\nshipowner\nshipping_agent\nshirtmaker\nshogun\nshopaholic\nshop_girl\nshop_steward, steward\nshot_putter\nshrew, termagant\nshuffler\nshyster, pettifogger\nsibling, sib\nsick_person, diseased_person, sufferer\nsightreader\nsignaler, signaller\nsigner\nsignor, signior\nsignora\nsignore\nsignorina\nsilent_partner, sleeping_partner\naddle-head, addlehead, loon, birdbrain\nsimperer\nsinger, vocalist, vocalizer, vocaliser\nSinologist\nsipper\nsirrah\nSister\nsister, sis\nwaverer, vacillator, hesitator, hesitater\nsitar_player\nsixth-former\nskateboarder\nskeptic, sceptic, doubter\nsketcher\nskidder\nskier\nskinny-dipper\nskin-diver, aquanaut\nskinhead\nslasher\nslattern, slut, slovenly_woman, trollop\nsleeper, slumberer\nsleeper\nsleeping_beauty\nsleuth, sleuthhound\nslob, sloven, pig, slovenly_person\nsloganeer\nslopseller, slop-seller\nsmasher, stunner, knockout, beauty, ravisher, sweetheart, peach, lulu, looker, mantrap, dish\nsmirker\nsmith, metalworker\nsmoothie, smoothy, sweet_talker, charmer\nsmuggler, runner, contrabandist, moon_curser, moon-curser\nsneezer\nsnob, prig, snot, snoot\nsnoop, snooper\nsnorer\nsob_sister\nsoccer_player\nsocial_anthropologist, cultural_anthropologist\nsocial_climber, climber\nsocialist\nsocializer, socialiser\nsocial_scientist\nsocial_secretary\nSocinian\nsociolinguist\nsociologist\nsoda_jerk, soda_jerker\nsodalist\nsodomite, sodomist, sod, bugger\nsoldier\nson, boy\nsongster\nsongstress\nsongwriter, songster, ballad_maker\nsorcerer, magician, wizard, necromancer, thaumaturge, thaumaturgist\nsorehead\nsoul_mate\nSouthern_Baptist\nsovereign, crowned_head, monarch\nspacewalker\nSpanish_American, Hispanic_American, Hispanic\nsparring_partner, sparring_mate\nspastic\nspeaker, talker, utterer, verbalizer, verbaliser\nnative_speaker\nSpeaker\nspeechwriter\nspecialist, medical_specialist\nspecifier\nspectator, witness, viewer, watcher, looker\nspeech_therapist\nspeedskater, speed_skater\nspellbinder\nsphinx\nspinster, old_maid\nsplit_end\nsport, sportsman, sportswoman\nsport, summercater\nsporting_man, outdoor_man\nsports_announcer, sportscaster, sports_commentator\nsports_editor\nsprog\nsquare_dancer\nsquare_shooter, straight_shooter, straight_arrow\nsquatter\nsquire\nsquire\nstaff_member, staffer\nstaff_sergeant\nstage_director\nstainer\nstakeholder\nstalker\nstalking-horse\nstammerer, stutterer\nstamper, stomper, tramper, trampler\nstandee\nstand-in, substitute, relief, reliever, backup, backup_man, fill-in\nstar, principal, lead\nstarlet\nstarter, dispatcher\nstatesman, solon, national_leader\nstate_treasurer\nstationer, stationery_seller\nstenographer, amanuensis, shorthand_typist\nstentor\nstepbrother, half-brother, half_brother\nstepmother\nstepparent\nstevedore, loader, longshoreman, docker, dockhand, dock_worker, dockworker, dock-walloper, lumper\nsteward\nsteward, flight_attendant\nsteward\nstickler\nstiff\nstifler, smotherer\nstipendiary, stipendiary_magistrate\nstitcher\nstockjobber\nstock_trader\nstockist\nstoker, fireman\nstooper\nstore_detective\nstrafer\nstraight_man, second_banana\nstranger, alien, unknown\nstranger\nstrategist, strategian\nstraw_boss, assistant_foreman\nstreetwalker, street_girl, hooker, hustler, floozy, floozie, slattern\nstretcher-bearer, litter-bearer\nstruggler\nstud, he-man, macho-man\nstudent, pupil, educatee\nstumblebum, palooka\nstylist\nsubaltern\nsubcontractor\nsubduer, surmounter, overcomer\nsubject, case, guinea_pig\nsubordinate, subsidiary, underling, foot_soldier\nsubstitute, reserve, second-stringer\nsuccessor, heir\nsuccessor, replacement\nsuccorer, succourer\nSufi\nsuffragan, suffragan_bishop\nsuffragette\nsugar_daddy\nsuicide_bomber\nsuitor, suer, wooer\nsumo_wrestler\nsunbather\nsundowner\nsuper_heavyweight\nsuperior, higher-up, superordinate\nsupermom\nsupernumerary, spear_carrier, extra\nsupremo\nsurgeon, operating_surgeon, sawbones\nSurgeon_General\nSurgeon_General\nsurpriser\nsurveyor\nsurveyor\nsurvivor, subsister\nsutler, victualer, victualler, provisioner\nsweeper\nsweetheart, sweetie, steady, truelove\nswinger, tramp\nswitcher, whipper\nswot, grind, nerd, wonk, dweeb\nsycophant, toady, crawler, lackey, ass-kisser\nsylph\nsympathizer, sympathiser, well-wisher\nsymphonist\nsyncopator\nsyndic\ntactician\ntagger\ntailback\ntallyman, tally_clerk\ntallyman\ntanker, tank_driver\ntapper, wiretapper, phone_tapper\nTartuffe, Tartufe\nTarzan\ntaster, taste_tester, taste-tester, sampler\ntax_assessor, assessor\ntaxer\ntaxi_dancer\ntaxonomist, taxonomer, systematist\nteacher, instructor\nteaching_fellow\ntearaway\ntechnical_sergeant\ntechnician\nTed, Teddy_boy\nteetotaler, teetotaller, teetotalist\ntelevision_reporter, television_newscaster, TV_reporter, TV_newsman\ntemporizer, temporiser\ntempter\nterm_infant\ntoiler\ntenant, renter\ntenant\ntenderfoot\ntennis_player\ntennis_pro, professional_tennis_player\ntenor_saxophonist, tenorist\ntermer\nterror, scourge, threat\ntertigravida, gravida_III\ntestator, testate\ntestatrix\ntestee, examinee\ntest-tube_baby\nTexas_Ranger, Ranger\nthane\ntheatrical_producer\ntheologian, theologist, theologizer, theologiser\ntheorist, theoretician, theorizer, theoriser, idealogue\ntheosophist\ntherapist, healer\nThessalonian\nthinker, creative_thinker, mind\nthinker\nthrower\nthurifer\nticket_collector, ticket_taker\ntight_end\ntiler\ntimekeeper, timer\nTimorese\ntinkerer, fiddler\ntinsmith, tinner\ntinter\ntippler, social_drinker\ntipster, tout\nT-man\ntoastmaster, symposiarch\ntoast_mistress\ntobogganist\ntomboy, romp, hoyden\ntoolmaker\ntorchbearer\nTory\nTory\ntosser\ntosser, jerk-off, wanker\ntotalitarian\ntourist, tourer, holidaymaker\ntout, touter\ntout, ticket_tout\ntovarich, tovarisch\ntowhead\ntown_clerk\ntown_crier, crier\ntownsman, towner\ntoxicologist\ntrack_star\ntrader, bargainer, dealer, monger\ntrade_unionist, unionist, union_member\ntraditionalist, diehard\ntraffic_cop\ntragedian\ntragedian\ntragedienne\ntrail_boss\ntrainer\ntraitor, treasonist\ntraitress\ntransactor\ntranscriber\ntransfer, transferee\ntransferee\ntranslator, transcriber\ntransvestite, cross-dresser\ntraveling_salesman, travelling_salesman, commercial_traveler, commercial_traveller, roadman, bagman\ntraverser\ntrawler\nTreasury, First_Lord_of_the_Treasury\ntrencher\ntrend-setter, taste-maker, fashion_arbiter\ntribesman\ntrier, attempter, essayer\ntrifler\ntrooper\ntrooper, state_trooper\nTrotskyite, Trotskyist, Trot\ntruant, hooky_player\ntrumpeter, cornetist\ntrusty\nTudor\ntumbler\ntutee\ntwin\ntwo-timer\nTyke\ntympanist, timpanist\ntypist\ntyrant, autocrat, despot\numpire, ump\nunderstudy, standby\nundesirable\nunicyclist\nunilateralist\nUnitarian\nArminian\nuniversal_donor\nUNIX_guru\nUnknown_Soldier\nupsetter\nupstager\nupstart, parvenu, nouveau-riche, arriviste\nupstart\nurchin\nurologist\nusherette\nusher, doorkeeper\nusurper, supplanter\nutility_man\nutilizer, utiliser\nUtopian\nuxoricide\nvacationer, vacationist\nvaledictorian, valedictory_speaker\nvalley_girl\nvaulter, pole_vaulter, pole_jumper\nvegetarian\nvegan\nvenerator\nventure_capitalist\nventurer, merchant-venturer\nvermin, varmint\nvery_important_person, VIP, high-up, dignitary, panjandrum, high_muckamuck\nvibist, vibraphonist\nvicar\nvicar\nvicar-general\nvice_chancellor\nvicegerent\nvice_president, V.P.\nvice-regent\nvictim, dupe\nVictorian\nvictualer, victualler\nvigilante, vigilance_man\nvillager\nvintager\nvintner, wine_merchant\nviolator, debaucher, ravisher\nviolator, lawbreaker, law_offender\nviolist\nvirago\nvirologist\nVisayan, Bisayan\nviscountess\nviscount\nVisigoth\nvisionary\nvisiting_fireman\nvisiting_professor\nvisualizer, visualiser\nvixen, harpy, hellcat\nvizier\nvoicer\nvolunteer, unpaid_worker\nvolunteer, military_volunteer, voluntary\nvotary\nvotary\nvouchee\nvower\nvoyager\nvoyeur, Peeping_Tom, peeper\nvulcanizer, vulcaniser\nwaffler\nWagnerian\nwaif, street_child\nwailer\nwaiter, server\nwaitress\nwalking_delegate\nwalk-on\nwallah\nwally\nwaltzer\nwanderer, roamer, rover, bird_of_passage\nWandering_Jew\nwanton\nwarrantee\nwarrantee\nwasher\nwasherman, laundryman\nwashwoman, washerwoman, laundrywoman, laundress\nwassailer, carouser\nwastrel, waster\nWave\nweatherman, weather_forecaster\nweekend_warrior\nweeder\nwelder\nwelfare_case, charity_case\nwesterner\nWest-sider\nwetter\nwhaler\nWhig\nwhiner, complainer, moaner, sniveller, crybaby, bellyacher, grumbler, squawker\nwhipper-in\nwhisperer\nwhiteface\nCarmelite, White_Friar\nAugustinian\nwhite_hope, great_white_hope\nwhite_supremacist\nwhoremaster, whoremonger\nwhoremaster, whoremonger, john, trick\nwidow, widow_woman\nwife, married_woman\nwiggler, wriggler, squirmer\nwimp, chicken, crybaby\nwing_commander\nwinger\nwinner\nwinner, victor\nwindow_dresser, window_trimmer\nwinker\nwiper\nwireman, wirer\nwise_guy, smart_aleck, wiseacre, wisenheimer, weisenheimer\nwitch_doctor\nwithdrawer\nwithdrawer\nwoman, adult_female\nwoman\nwonder_boy, golden_boy\nwonderer\nworking_girl\nworkman, workingman, working_man, working_person\nworkmate\nworldling\nworshiper, worshipper\nworthy\nwrecker\nwright\nwrite-in_candidate, write-in\nwriter, author\nWykehamist\nyakuza\nyard_bird, yardbird\nyardie\nyardman\nyardmaster, trainmaster, train_dispatcher\nyenta\nyogi\nyoung_buck, young_man\nyoung_Turk\nYoung_Turk\nZionist\nzoo_keeper\nGenet, Edmund_Charles_Edouard_Genet, Citizen_Genet\nKennan, George_F._Kennan, George_Frost_Kennan\nMunro, H._H._Munro, Hector_Hugh_Munro, Saki\nPopper, Karl_Popper, Sir_Karl_Raimund_Popper\nStoker, Bram_Stoker, Abraham_Stoker\nTownes, Charles_Townes, Charles_Hard_Townes\ndust_storm, duster, sandstorm, sirocco\nparhelion, mock_sun, sundog\nsnow, snowfall\nfacula\nwave\nmicroflora\nwilding\nsemi-climber\nvolva\nbasidiocarp\ndomatium\napomict\naquatic\nbryophyte, nonvascular_plant\nacrocarp, acrocarpous_moss\nsphagnum, sphagnum_moss, peat_moss, bog_moss\nliverwort, hepatic\nhepatica, Marchantia_polymorpha\npecopteris\npteridophyte, nonflowering_plant\nfern\nfern_ally\nspore\ncarpospore\nchlamydospore\nconidium, conidiospore\noospore\ntetraspore\nzoospore\ncryptogam\nspermatophyte, phanerogam, seed_plant\nseedling\nannual\nbiennial\nperennial\nhygrophyte\ngymnosperm\ngnetum, Gnetum_gnemon\nCatha_edulis\nephedra, joint_fir\nmahuang, Ephedra_sinica\nwelwitschia, Welwitschia_mirabilis\ncycad\nsago_palm, Cycas_revoluta\nfalse_sago, fern_palm, Cycas_circinalis\nzamia\ncoontie, Florida_arrowroot, Seminole_bread, Zamia_pumila\nceratozamia\ndioon\nencephalartos\nkaffir_bread, Encephalartos_caffer\nmacrozamia\nburrawong, Macrozamia_communis, Macrozamia_spiralis\npine, pine_tree, true_pine\npinon, pinyon\nnut_pine\npinon_pine, Mexican_nut_pine, Pinus_cembroides\nRocky_mountain_pinon, Pinus_edulis\nsingle-leaf, single-leaf_pine, single-leaf_pinyon, Pinus_monophylla\nbishop_pine, bishop's_pine, Pinus_muricata\nCalifornia_single-leaf_pinyon, Pinus_californiarum\nParry's_pinyon, Pinus_quadrifolia, Pinus_parryana\nspruce_pine, Pinus_glabra\nblack_pine, Pinus_nigra\npitch_pine, northern_pitch_pine, Pinus_rigida\npond_pine, Pinus_serotina\nstone_pine, umbrella_pine, European_nut_pine, Pinus_pinea\nSwiss_pine, Swiss_stone_pine, arolla_pine, cembra_nut_tree, Pinus_cembra\ncembra_nut, cedar_nut\nSwiss_mountain_pine, mountain_pine, dwarf_mountain_pine, mugho_pine, mugo_pine, Pinus_mugo\nancient_pine, Pinus_longaeva\nwhite_pine\nAmerican_white_pine, eastern_white_pine, weymouth_pine, Pinus_strobus\nwestern_white_pine, silver_pine, mountain_pine, Pinus_monticola\nsouthwestern_white_pine, Pinus_strobiformis\nlimber_pine, Pinus_flexilis\nwhitebark_pine, whitebarked_pine, Pinus_albicaulis\nyellow_pine\nponderosa, ponderosa_pine, western_yellow_pine, bull_pine, Pinus_ponderosa\nJeffrey_pine, Jeffrey's_pine, black_pine, Pinus_jeffreyi\nshore_pine, lodgepole, lodgepole_pine, spruce_pine, Pinus_contorta\nSierra_lodgepole_pine, Pinus_contorta_murrayana\nloblolly_pine, frankincense_pine, Pinus_taeda\njack_pine, Pinus_banksiana\nswamp_pine\nlongleaf_pine, pitch_pine, southern_yellow_pine, Georgia_pine, Pinus_palustris\nshortleaf_pine, short-leaf_pine, shortleaf_yellow_pine, Pinus_echinata\nred_pine, Canadian_red_pine, Pinus_resinosa\nScotch_pine, Scots_pine, Scotch_fir, Pinus_sylvestris\nscrub_pine, Virginia_pine, Jersey_pine, Pinus_virginiana\nMonterey_pine, Pinus_radiata\nbristlecone_pine, Rocky_Mountain_bristlecone_pine, Pinus_aristata\ntable-mountain_pine, prickly_pine, hickory_pine, Pinus_pungens\nknobcone_pine, Pinus_attenuata\nJapanese_red_pine, Japanese_table_pine, Pinus_densiflora\nJapanese_black_pine, black_pine, Pinus_thunbergii\nTorrey_pine, Torrey's_pine, soledad_pine, grey-leaf_pine, sabine_pine, Pinus_torreyana\nlarch, larch_tree\nAmerican_larch, tamarack, black_larch, Larix_laricina\nwestern_larch, western_tamarack, Oregon_larch, Larix_occidentalis\nsubalpine_larch, Larix_lyallii\nEuropean_larch, Larix_decidua\nSiberian_larch, Larix_siberica, Larix_russica\ngolden_larch, Pseudolarix_amabilis\nfir, fir_tree, true_fir\nsilver_fir\namabilis_fir, white_fir, Pacific_silver_fir, red_silver_fir, Christmas_tree, Abies_amabilis\nEuropean_silver_fir, Christmas_tree, Abies_alba\nwhite_fir, Colorado_fir, California_white_fir, Abies_concolor, Abies_lowiana\nbalsam_fir, balm_of_Gilead, Canada_balsam, Abies_balsamea\nFraser_fir, Abies_fraseri\nlowland_fir, lowland_white_fir, giant_fir, grand_fir, Abies_grandis\nAlpine_fir, subalpine_fir, Abies_lasiocarpa\nSanta_Lucia_fir, bristlecone_fir, Abies_bracteata, Abies_venusta\ncedar, cedar_tree, true_cedar\ncedar_of_Lebanon, Cedrus_libani\ndeodar, deodar_cedar, Himalayan_cedar, Cedrus_deodara\nAtlas_cedar, Cedrus_atlantica\nspruce\nNorway_spruce, Picea_abies\nweeping_spruce, Brewer's_spruce, Picea_breweriana\nEngelmann_spruce, Engelmann's_spruce, Picea_engelmannii\nwhite_spruce, Picea_glauca\nblack_spruce, Picea_mariana, spruce_pine\nSiberian_spruce, Picea_obovata\nSitka_spruce, Picea_sitchensis\noriental_spruce, Picea_orientalis\nColorado_spruce, Colorado_blue_spruce, silver_spruce, Picea_pungens\nred_spruce, eastern_spruce, yellow_spruce, Picea_rubens\nhemlock, hemlock_tree\neastern_hemlock, Canadian_hemlock, spruce_pine, Tsuga_canadensis\nCarolina_hemlock, Tsuga_caroliniana\nmountain_hemlock, black_hemlock, Tsuga_mertensiana\nwestern_hemlock, Pacific_hemlock, west_coast_hemlock, Tsuga_heterophylla\ndouglas_fir\ngreen_douglas_fir, douglas_spruce, douglas_pine, douglas_hemlock, Oregon_fir, Oregon_pine, Pseudotsuga_menziesii\nbig-cone_spruce, big-cone_douglas_fir, Pseudotsuga_macrocarpa\nCathaya\ncedar, cedar_tree\ncypress, cypress_tree\ngowen_cypress, Cupressus_goveniana\npygmy_cypress, Cupressus_pigmaea, Cupressus_goveniana_pigmaea\nSanta_Cruz_cypress, Cupressus_abramsiana, Cupressus_goveniana_abramsiana\nArizona_cypress, Cupressus_arizonica\nGuadalupe_cypress, Cupressus_guadalupensis\nMonterey_cypress, Cupressus_macrocarpa\nMexican_cypress, cedar_of_Goa, Portuguese_cypress, Cupressus_lusitanica\nItalian_cypress, Mediterranean_cypress, Cupressus_sempervirens\nKing_William_pine, Athrotaxis_selaginoides\nChilean_cedar, Austrocedrus_chilensis\nincense_cedar, red_cedar, Calocedrus_decurrens, Libocedrus_decurrens\nsouthern_white_cedar, coast_white_cedar, Atlantic_white_cedar, white_cypress, white_cedar, Chamaecyparis_thyoides\nOregon_cedar, Port_Orford_cedar, Lawson's_cypress, Lawson's_cedar, Chamaecyparis_lawsoniana\nyellow_cypress, yellow_cedar, Nootka_cypress, Alaska_cedar, Chamaecyparis_nootkatensis\nJapanese_cedar, Japan_cedar, sugi, Cryptomeria_japonica\njuniper_berry\nincense_cedar\nkawaka, Libocedrus_plumosa\npahautea, Libocedrus_bidwillii, mountain_pine\nmetasequoia, dawn_redwood, Metasequoia_glyptostrodoides\narborvitae\nwestern_red_cedar, red_cedar, canoe_cedar, Thuja_plicata\nAmerican_arborvitae, northern_white_cedar, white_cedar, Thuja_occidentalis\nOriental_arborvitae, Thuja_orientalis, Platycladus_orientalis\nhiba_arborvitae, Thujopsis_dolobrata\nketeleeria\nWollemi_pine\naraucaria\nmonkey_puzzle, chile_pine, Araucaria_araucana\nnorfolk_island_pine, Araucaria_heterophylla, Araucaria_excelsa\nnew_caledonian_pine, Araucaria_columnaris\nbunya_bunya, bunya_bunya_tree, Araucaria_bidwillii\nhoop_pine, Moreton_Bay_pine, Araucaria_cunninghamii\nkauri_pine, dammar_pine\nkauri, kaury, Agathis_australis\namboina_pine, amboyna_pine, Agathis_dammara, Agathis_alba\ndundathu_pine, queensland_kauri, smooth_bark_kauri, Agathis_robusta\nred_kauri, Agathis_lanceolata\nplum-yew\nCalifornia_nutmeg, nutmeg-yew, Torreya_californica\nstinking_cedar, stinking_yew, Torrey_tree, Torreya_taxifolia\ncelery_pine\ncelery_top_pine, celery-topped_pine, Phyllocladus_asplenifolius\ntanekaha, Phyllocladus_trichomanoides\nAlpine_celery_pine, Phyllocladus_alpinus\nyellowwood, yellowwood_tree\ngymnospermous_yellowwood\npodocarp\nyacca, yacca_podocarp, Podocarpus_coriaceus\nbrown_pine, Rockingham_podocarp, Podocarpus_elatus\ncape_yellowwood, African_yellowwood, Podocarpus_elongatus\nSouth-African_yellowwood, Podocarpus_latifolius\nalpine_totara, Podocarpus_nivalis\ntotara, Podocarpus_totara\ncommon_yellowwood, bastard_yellowwood, Afrocarpus_falcata\nkahikatea, New_Zealand_Dacryberry, New_Zealand_white_pine, Dacrycarpus_dacrydioides, Podocarpus_dacrydioides\nrimu, imou_pine, red_pine, Dacrydium_cupressinum\ntarwood, tar-wood, Dacrydium_colensoi\ncommon_sickle_pine, Falcatifolium_falciforme\nyellow-leaf_sickle_pine, Falcatifolium_taxoides\ntarwood, tar-wood, New_Zealand_mountain_pine, Halocarpus_bidwilli, Dacrydium_bidwilli\nwestland_pine, silver_pine, Lagarostrobus_colensoi\nhuon_pine, Lagarostrobus_franklinii, Dacrydium_franklinii\nChilean_rimu, Lepidothamnus_fonkii\nmountain_rimu, Lepidothamnus_laxifolius, Dacridium_laxifolius\nnagi, Nageia_nagi\nmiro, black_pine, Prumnopitys_ferruginea, Podocarpus_ferruginea\nmatai, black_pine, Prumnopitys_taxifolia, Podocarpus_spicata\nplum-fruited_yew, Prumnopitys_andina, Prumnopitys_elegans\nPrince_Albert_yew, Prince_Albert's_yew, Saxe-gothea_conspicua\nSundacarpus_amara, Prumnopitys_amara, Podocarpus_amara\nJapanese_umbrella_pine, Sciadopitys_verticillata\nyew\nOld_World_yew, English_yew, Taxus_baccata\nPacific_yew, California_yew, western_yew, Taxus_brevifolia\nJapanese_yew, Taxus_cuspidata\nFlorida_yew, Taxus_floridana\nNew_Caledonian_yew, Austrotaxus_spicata\nwhite-berry_yew, Pseudotaxus_chienii\nginkgo, gingko, maidenhair_tree, Ginkgo_biloba\nangiosperm, flowering_plant\ndicot, dicotyledon, magnoliopsid, exogen\nmonocot, monocotyledon, liliopsid, endogen\nfloret, floweret\nflower\nbloomer\nwildflower, wild_flower\napetalous_flower\ninflorescence\nrosebud\ngynostegium\npollinium\npistil\ngynobase\ngynophore\nstylopodium\ncarpophore\ncornstalk, corn_stalk\npetiolule\nmericarp\nmicropyle\ngerm_tube\npollen_tube\ngemma\ngalbulus\nnectary, honey_gland\npericarp, seed_vessel\nepicarp, exocarp\nmesocarp\npip\nsilique, siliqua\ncataphyll\nperisperm\nmonocarp, monocarpic_plant, monocarpous_plant\nsporophyte\ngametophyte\nmegasporangium, macrosporangium\nmicrospore\nmicrosporangium\nmicrosporophyll\narchespore, archesporium\nbonduc_nut, nicker_nut, nicker_seed\nJob's_tears\noilseed, oil-rich_seed\ncastor_bean\ncottonseed\ncandlenut\npeach_pit\nhypanthium, floral_cup, calyx_tube\npetal, flower_petal\ncorolla\nlip\nperianth, chlamys, floral_envelope, perigone, perigonium\nthistledown\ncustard_apple, custard_apple_tree\ncherimoya, cherimoya_tree, Annona_cherimola\nilama, ilama_tree, Annona_diversifolia\nsoursop, prickly_custard_apple, soursop_tree, Annona_muricata\nbullock's_heart, bullock's_heart_tree, bullock_heart, Annona_reticulata\nsweetsop, sweetsop_tree, Annona_squamosa\npond_apple, pond-apple_tree, Annona_glabra\npawpaw, papaw, papaw_tree, Asimina_triloba\nilang-ilang, ylang-ylang, Cananga_odorata\nlancewood, lancewood_tree, Oxandra_lanceolata\nGuinea_pepper, negro_pepper, Xylopia_aethiopica\nbarberry\nAmerican_barberry, Berberis_canadensis\ncommon_barberry, European_barberry, Berberis_vulgaris\nJapanese_barberry, Berberis_thunbergii\nOregon_grape, Oregon_holly_grape, hollygrape, mountain_grape, holly-leaves_barberry, Mahonia_aquifolium\nOregon_grape, Mahonia_nervosa\nmayapple, May_apple, wild_mandrake, Podophyllum_peltatum\nMay_apple\nallspice\nCarolina_allspice, strawberry_shrub, strawberry_bush, sweet_shrub, Calycanthus_floridus\nspicebush, California_allspice, Calycanthus_occidentalis\nkatsura_tree, Cercidiphyllum_japonicum\nlaurel\ntrue_laurel, bay, bay_laurel, bay_tree, Laurus_nobilis\ncamphor_tree, Cinnamomum_camphora\ncinnamon, Ceylon_cinnamon, Ceylon_cinnamon_tree, Cinnamomum_zeylanicum\ncassia, cassia-bark_tree, Cinnamomum_cassia\ncassia_bark, Chinese_cinnamon\nSaigon_cinnamon, Cinnamomum_loureirii\ncinnamon_bark\nspicebush, spice_bush, American_spicebush, Benjamin_bush, Lindera_benzoin, Benzoin_odoriferum\navocado, avocado_tree, Persea_Americana\nlaurel-tree, red_bay, Persea_borbonia\nsassafras, sassafras_tree, Sassafras_albidum\nCalifornia_laurel, California_bay_tree, Oregon_myrtle, pepperwood, spice_tree, sassafras_laurel, California_olive, mountain_laurel, Umbellularia_californica\nanise_tree\npurple_anise, Illicium_floridanum\nstar_anise, Illicium_anisatum\nstar_anise, Chinese_anise, Illicium_verum\nmagnolia\nsouthern_magnolia, evergreen_magnolia, large-flowering_magnolia, bull_bay, Magnolia_grandiflora\numbrella_tree, umbrella_magnolia, elkwood, elk-wood, Magnolia_tripetala\nearleaved_umbrella_tree, Magnolia_fraseri\ncucumber_tree, Magnolia_acuminata\nlarge-leaved_magnolia, large-leaved_cucumber_tree, great-leaved_macrophylla, Magnolia_macrophylla\nsaucer_magnolia, Chinese_magnolia, Magnolia_soulangiana\nstar_magnolia, Magnolia_stellata\nsweet_bay, swamp_bay, swamp_laurel, Magnolia_virginiana\nmanglietia, genus_Manglietia\ntulip_tree, tulip_poplar, yellow_poplar, canary_whitewood, Liriodendron_tulipifera\nmoonseed\ncommon_moonseed, Canada_moonseed, yellow_parilla, Menispermum_canadense\nCarolina_moonseed, Cocculus_carolinus\nnutmeg, nutmeg_tree, Myristica_fragrans\nwater_nymph, fragrant_water_lily, pond_lily, Nymphaea_odorata\nEuropean_white_lily, Nymphaea_alba\nsouthern_spatterdock, Nuphar_sagittifolium\nlotus, Indian_lotus, sacred_lotus, Nelumbo_nucifera\nwater_chinquapin, American_lotus, yanquapin, Nelumbo_lutea\nwater-shield, fanwort, Cabomba_caroliniana\nwater-shield, Brasenia_schreberi, water-target\npeony, paeony\nbuttercup, butterflower, butter-flower, crowfoot, goldcup, kingcup\nmeadow_buttercup, tall_buttercup, tall_crowfoot, tall_field_buttercup, Ranunculus_acris\nwater_crowfoot, water_buttercup, Ranunculus_aquatilis\nlesser_celandine, pilewort, Ranunculus_ficaria\nlesser_spearwort, Ranunculus_flammula\ngreater_spearwort, Ranunculus_lingua\nwestern_buttercup, Ranunculus_occidentalis\ncreeping_buttercup, creeping_crowfoot, Ranunculus_repens\ncursed_crowfoot, celery-leaved_buttercup, Ranunculus_sceleratus\naconite\nmonkshood, helmetflower, helmet_flower, Aconitum_napellus\nwolfsbane, wolfbane, wolf's_bane, Aconitum_lycoctonum\nbaneberry, cohosh, herb_Christopher\nbaneberry\nred_baneberry, redberry, red-berry, snakeberry, Actaea_rubra\npheasant's-eye, Adonis_annua\nanemone, windflower\nAlpine_anemone, mountain_anemone, Anemone_tetonensis\nCanada_anemone, Anemone_Canadensis\nthimbleweed, Anemone_cylindrica\nwood_anemone, Anemone_nemorosa\nwood_anemone, snowdrop, Anemone_quinquefolia\nlongheaded_thimbleweed, Anemone_riparia\nsnowdrop_anemone, snowdrop_windflower, Anemone_sylvestris\nVirginia_thimbleweed, Anemone_virginiana\nrue_anemone, Anemonella_thalictroides\ncolumbine, aquilegia, aquilege\nmeeting_house, honeysuckle, Aquilegia_canadensis\nblue_columbine, Aquilegia_caerulea, Aquilegia_scopulorum_calcarea\ngranny's_bonnets, Aquilegia_vulgaris\nmarsh_marigold, kingcup, meadow_bright, May_blob, cowslip, water_dragon, Caltha_palustris\nAmerican_bugbane, summer_cohosh, Cimicifuga_americana\nblack_cohosh, black_snakeroot, rattle-top, Cimicifuga_racemosa\nfetid_bugbane, foetid_bugbane, Cimicifuga_foetida\nclematis\npine_hyacinth, Clematis_baldwinii, Viorna_baldwinii\nblue_jasmine, blue_jessamine, curly_clematis, marsh_clematis, Clematis_crispa\ngolden_clematis, Clematis_tangutica\nscarlet_clematis, Clematis_texensis\nleather_flower, Clematis_versicolor\nleather_flower, vase-fine, vase_vine, Clematis_viorna\nvirgin's_bower, old_man's_beard, devil's_darning_needle, Clematis_virginiana\npurple_clematis, purple_virgin's_bower, mountain_clematis, Clematis_verticillaris\ngoldthread, golden_thread, Coptis_groenlandica, Coptis_trifolia_groenlandica\nrocket_larkspur, Consolida_ambigua, Delphinium_ajacis\ndelphinium\nlarkspur\nwinter_aconite, Eranthis_hyemalis\nlenten_rose, black_hellebore, Helleborus_orientalis\ngreen_hellebore, Helleborus_viridis\nhepatica, liverleaf\ngoldenseal, golden_seal, yellow_root, turmeric_root, Hydrastis_Canadensis\nfalse_rue_anemone, false_rue, Isopyrum_biternatum\ngiant_buttercup, Laccopetalum_giganteum\nnigella\nlove-in-a-mist, Nigella_damascena\nfennel_flower, Nigella_hispanica\nblack_caraway, nutmeg_flower, Roman_coriander, Nigella_sativa\npasqueflower, pasque_flower\nmeadow_rue\nfalse_bugbane, Trautvetteria_carolinensis\nglobeflower, globe_flower\nwinter's_bark, winter's_bark_tree, Drimys_winteri\npepper_shrub, Pseudowintera_colorata, Wintera_colorata\nsweet_gale, Scotch_gale, Myrica_gale\nwax_myrtle\nbay_myrtle, puckerbush, Myrica_cerifera\nbayberry, candleberry, swamp_candleberry, waxberry, Myrica_pensylvanica\nsweet_fern, Comptonia_peregrina, Comptonia_asplenifolia\ncorkwood, corkwood_tree, Leitneria_floridana\njointed_rush, Juncus_articulatus\ntoad_rush, Juncus_bufonius\nslender_rush, Juncus_tenuis\nzebrawood, zebrawood_tree\nConnarus_guianensis\nlegume, leguminous_plant\nlegume\npeanut\ngranadilla_tree, granadillo, Brya_ebenus\narariba, Centrolobium_robustum\ntonka_bean, coumara_nut\ncourbaril, Hymenaea_courbaril\nmelilotus, melilot, sweet_clover\ndarling_pea, poison_bush\nsmooth_darling_pea, Swainsona_galegifolia\nclover, trefoil\nalpine_clover, Trifolium_alpinum\nhop_clover, shamrock, lesser_yellow_trefoil, Trifolium_dubium\ncrimson_clover, Italian_clover, Trifolium_incarnatum\nred_clover, purple_clover, Trifolium_pratense\nbuffalo_clover, Trifolium_reflexum, Trifolium_stoloniferum\nwhite_clover, dutch_clover, shamrock, Trifolium_repens\nmimosa\nacacia\nshittah, shittah_tree\nwattle\nblack_wattle, Acacia_auriculiformis\ngidgee, stinking_wattle, Acacia_cambegei\ncatechu, Jerusalem_thorn, Acacia_catechu\nsilver_wattle, mimosa, Acacia_dealbata\nhuisache, cassie, mimosa_bush, sweet_wattle, sweet_acacia, scented_wattle, flame_tree, Acacia_farnesiana\nlightwood, Acacia_melanoxylon\ngolden_wattle, Acacia_pycnantha\nfever_tree, Acacia_xanthophloea\ncoralwood, coral-wood, red_sandalwood, Barbados_pride, peacock_flower_fence, Adenanthera_pavonina\nalbizzia, albizia\nsilk_tree, Albizia_julibrissin, Albizzia_julibrissin\nsiris, siris_tree, Albizia_lebbeck, Albizzia_lebbeck\nrain_tree, saman, monkeypod, monkey_pod, zaman, zamang, Albizia_saman\ncalliandra\nconacaste, elephant's_ear, Enterolobium_cyclocarpa\ninga\nice-cream_bean, Inga_edulis\nguama, Inga_laurina\nlead_tree, white_popinac, Leucaena_glauca, Leucaena_leucocephala\nwild_tamarind, Lysiloma_latisiliqua, Lysiloma_bahamensis\nsabicu, Lysiloma_sabicu\nnitta_tree\nParkia_javanica\nmanila_tamarind, camachile, huamachil, wild_tamarind, Pithecellobium_dulce\ncat's-claw, catclaw, black_bead, Pithecellodium_unguis-cati\nhoney_mesquite, Western_honey_mesquite, Prosopis_glandulosa\nalgarroba, algarrobilla, algarobilla\nscrew_bean, screwbean, tornillo, screwbean_mesquite, Prosopis_pubescens\nscrew_bean\ndogbane\nIndian_hemp, rheumatism_weed, Apocynum_cannabinum\nbushman's_poison, ordeal_tree, Acocanthera_oppositifolia, Acocanthera_venenata\nimpala_lily, mock_azalia, desert_rose, kudu_lily, Adenium_obesum, Adenium_multiflorum\nallamanda\ncommon_allamanda, golden_trumpet, Allamanda_cathartica\ndita, dita_bark, devil_tree, Alstonia_scholaris\nNepal_trumpet_flower, Easter_lily_vine, Beaumontia_grandiflora\ncarissa\nhedge_thorn, natal_plum, Carissa_bispinosa\nnatal_plum, amatungulu, Carissa_macrocarpa, Carissa_grandiflora\nperiwinkle, rose_periwinkle, Madagascar_periwinkle, old_maid, Cape_periwinkle, red_periwinkle, cayenne_jasmine, Catharanthus_roseus, Vinca_rosea\nivory_tree, conessi, kurchi, kurchee, Holarrhena_pubescens, Holarrhena_antidysenterica\nwhite_dipladenia, Mandevilla_boliviensis, Dipladenia_boliviensis\nChilean_jasmine, Mandevilla_laxa\noleander, rose_bay, Nerium_oleander\nfrangipani, frangipanni\nWest_Indian_jasmine, pagoda_tree, Plumeria_alba\nrauwolfia, rauvolfia\nsnakewood, Rauwolfia_serpentina\nStrophanthus_kombe\nyellow_oleander, Thevetia_peruviana, Thevetia_neriifolia\nmyrtle, Vinca_minor\nlarge_periwinkle, Vinca_major\narum, aroid\ncuckoopint, lords-and-ladies, jack-in-the-pulpit, Arum_maculatum\nblack_calla, Arum_palaestinum\ncalamus\nalocasia, elephant's_ear, elephant_ear\ngiant_taro, Alocasia_macrorrhiza\namorphophallus\npungapung, telingo_potato, elephant_yam, Amorphophallus_paeonifolius, Amorphophallus_campanulatus\ndevil's_tongue, snake_palm, umbrella_arum, Amorphophallus_rivieri\nanthurium, tailflower, tail-flower\nflamingo_flower, flamingo_plant, Anthurium_andraeanum, Anthurium_scherzerianum\njack-in-the-pulpit, Indian_turnip, wake-robin, Arisaema_triphyllum, Arisaema_atrorubens\nfriar's-cowl, Arisarum_vulgare\ncaladium\nCaladium_bicolor\nwild_calla, water_arum, Calla_palustris\ntaro, taro_plant, dalo, dasheen, Colocasia_esculenta\ntaro, cocoyam, dasheen, eddo\ncryptocoryne, water_trumpet\ndracontium\ngolden_pothos, pothos, ivy_arum, Epipremnum_aureum, Scindapsus_aureus\nskunk_cabbage, Lysichiton_americanum\nmonstera\nceriman, Monstera_deliciosa\nnephthytis\nNephthytis_afzelii\narrow_arum\ngreen_arrow_arum, tuckahoe, Peltandra_virginica\nphilodendron\npistia, water_lettuce, water_cabbage, Pistia_stratiotes, Pistia_stratoites\npothos\nspathiphyllum, peace_lily, spathe_flower\nskunk_cabbage, polecat_weed, foetid_pothos, Symplocarpus_foetidus\nyautia, tannia, spoonflower, malanga, Xanthosoma_sagittifolium, Xanthosoma_atrovirens\ncalla_lily, calla, arum_lily, Zantedeschia_aethiopica\npink_calla, Zantedeschia_rehmanii\ngolden_calla\nduckweed\ncommon_duckweed, lesser_duckweed, Lemna_minor\nstar-duckweed, Lemna_trisulca\ngreat_duckweed, water_flaxseed, Spirodela_polyrrhiza\nwatermeal\ncommon_wolffia, Wolffia_columbiana\naralia\nAmerican_angelica_tree, devil's_walking_stick, Hercules'-club, Aralia_spinosa\nAmerican_spikenard, petty_morel, life-of-man, Aralia_racemosa\nbristly_sarsaparilla, bristly_sarsparilla, dwarf_elder, Aralia_hispida\nJapanese_angelica_tree, Aralia_elata\nChinese_angelica, Chinese_angelica_tree, Aralia_stipulata\nivy, common_ivy, English_ivy, Hedera_helix\npuka, Meryta_sinclairii\nginseng, nin-sin, Panax_ginseng, Panax_schinseng, Panax_pseudoginseng\nginseng\numbrella_tree, Schefflera_actinophylla, Brassaia_actinophylla\nbirthwort, Aristolochia_clematitis\nDutchman's-pipe, pipe_vine, Aristolochia_macrophylla, Aristolochia_durior\nVirginia_snakeroot, Virginia_serpentaria, Virginia_serpentary, Aristolochia_serpentaria\nCanada_ginger, black_snakeroot, Asarum_canadense\nheartleaf, heart-leaf, Asarum_virginicum\nheartleaf, heart-leaf, Asarum_shuttleworthii\nasarabacca, Asarum_europaeum\ncaryophyllaceous_plant\ncorn_cockle, corn_campion, crown-of-the-field, Agrostemma_githago\nsandwort\nmountain_sandwort, mountain_starwort, mountain_daisy, Arenaria_groenlandica\npine-barren_sandwort, longroot, Arenaria_caroliniana\nseabeach_sandwort, Arenaria_peploides\nrock_sandwort, Arenaria_stricta\nthyme-leaved_sandwort, Arenaria_serpyllifolia\nmouse-ear_chickweed, mouse_eared_chickweed, mouse_ear, clammy_chickweed, chickweed\nsnow-in-summer, love-in-a-mist, Cerastium_tomentosum\nAlpine_mouse-ear, Arctic_mouse-ear, Cerastium_alpinum\npink, garden_pink\nsweet_William, Dianthus_barbatus\ncarnation, clove_pink, gillyflower, Dianthus_caryophyllus\nchina_pink, rainbow_pink, Dianthus_chinensis\nJapanese_pink, Dianthus_chinensis_heddewigii\nmaiden_pink, Dianthus_deltoides\ncheddar_pink, Diangus_gratianopolitanus\nbutton_pink, Dianthus_latifolius\ncottage_pink, grass_pink, Dianthus_plumarius\nfringed_pink, Dianthus_supurbus\ndrypis\nbaby's_breath, babies'-breath, Gypsophila_paniculata\ncoral_necklace, Illecebrum_verticullatum\nlychnis, catchfly\nragged_robin, cuckoo_flower, Lychnis_flos-cuculi, Lychins_floscuculi\nscarlet_lychnis, maltese_cross, Lychins_chalcedonica\nmullein_pink, rose_campion, gardener's_delight, dusty_miller, Lychnis_coronaria\nsandwort, Moehringia_lateriflora\nsandwort, Moehringia_mucosa\nsoapwort, hedge_pink, bouncing_Bet, bouncing_Bess, Saponaria_officinalis\nknawel, knawe, Scleranthus_annuus\nsilene, campion, catchfly\nmoss_campion, Silene_acaulis\nwild_pink, Silene_caroliniana\nred_campion, red_bird's_eye, Silene_dioica, Lychnis_dioica\nwhite_campion, evening_lychnis, white_cockle, bladder_campion, Silene_latifolia, Lychnis_alba\nfire_pink, Silene_virginica\nbladder_campion, Silene_uniflora, Silene_vulgaris\ncorn_spurry, corn_spurrey, Spergula_arvensis\nsand_spurry, sea_spurry, Spergularia_rubra\nchickweed\ncommon_chickweed, Stellaria_media\ncowherb, cow_cockle, Vaccaria_hispanica, Vaccaria_pyramidata, Saponaria_vaccaria\nHottentot_fig, Hottentot's_fig, sour_fig, Carpobrotus_edulis, Mesembryanthemum_edule\nlivingstone_daisy, Dorotheanthus_bellidiformis\nfig_marigold, pebble_plant\nice_plant, icicle_plant, Mesembryanthemum_crystallinum\nNew_Zealand_spinach, Tetragonia_tetragonioides, Tetragonia_expansa\namaranth\namaranth\ntumbleweed, Amaranthus_albus, Amaranthus_graecizans\nprince's-feather, gentleman's-cane, prince's-plume, red_amaranth, purple_amaranth, Amaranthus_cruentus, Amaranthus_hybridus_hypochondriacus, Amaranthus_hybridus_erythrostachys\npigweed, Amaranthus_hypochondriacus\nthorny_amaranth, Amaranthus_spinosus\nalligator_weed, alligator_grass, Alternanthera_philoxeroides\ncockscomb, common_cockscomb, Celosia_cristata, Celosia_argentea_cristata\ncottonweed\nglobe_amaranth, bachelor's_button, Gomphrena_globosa\nbloodleaf\nsaltwort, Batis_maritima\nlamb's-quarters, pigweed, wild_spinach, Chenopodium_album\ngood-king-henry, allgood, fat_hen, wild_spinach, Chenopodium_bonus-henricus\nJerusalem_oak, feather_geranium, Mexican_tea, Chenopodium_botrys, Atriplex_mexicana\noak-leaved_goosefoot, oakleaf_goosefoot, Chenopodium_glaucum\nsowbane, red_goosefoot, Chenopodium_hybridum\nnettle-leaved_goosefoot, nettleleaf_goosefoot, Chenopodium_murale\nred_goosefoot, French_spinach, Chenopodium_rubrum\nstinking_goosefoot, Chenopodium_vulvaria\norach, orache\nsaltbush\ngarden_orache, mountain_spinach, Atriplex_hortensis\ndesert_holly, Atriplex_hymenelytra\nquail_bush, quail_brush, white_thistle, Atriplex_lentiformis\nbeet, common_beet, Beta_vulgaris\nbeetroot, Beta_vulgaris_rubra\nchard, Swiss_chard, spinach_beet, leaf_beet, chard_plant, Beta_vulgaris_cicla\nmangel-wurzel, mangold-wurzel, mangold, Beta_vulgaris_vulgaris\nwinged_pigweed, tumbleweed, Cycloloma_atriplicifolium\nhalogeton, Halogeton_glomeratus\nglasswort, samphire, Salicornia_europaea\nsaltwort, barilla, glasswort, kali, kelpwort, Salsola_kali, Salsola_soda\nRussian_thistle, Russian_tumbleweed, Russian_cactus, tumbleweed, Salsola_kali_tenuifolia\ngreasewood, black_greasewood, Sarcobatus_vermiculatus\nscarlet_musk_flower, Nyctaginia_capitata\nsand_verbena\nsweet_sand_verbena, Abronia_fragrans\nyellow_sand_verbena, Abronia_latifolia\nbeach_pancake, Abronia_maritima\nbeach_sand_verbena, pink_sand_verbena, Abronia_umbellata\ndesert_sand_verbena, Abronia_villosa\ntrailing_four_o'clock, trailing_windmills, Allionia_incarnata\nbougainvillea\numbrellawort\nfour_o'clock\ncommon_four-o'clock, marvel-of-Peru, Mirabilis_jalapa, Mirabilis_uniflora\nCalifornia_four_o'clock, Mirabilis_laevis, Mirabilis_californica\nsweet_four_o'clock, maravilla, Mirabilis_longiflora\ndesert_four_o'clock, Colorado_four_o'clock, maravilla, Mirabilis_multiflora\nmountain_four_o'clock, Mirabilis_oblongifolia\ncockspur, Pisonia_aculeata\nrattail_cactus, rat's-tail_cactus, Aporocactus_flagelliformis\nsaguaro, sahuaro, Carnegiea_gigantea\nnight-blooming_cereus\nechinocactus, barrel_cactus\nhedgehog_cactus\ngolden_barrel_cactus, Echinocactus_grusonii\nhedgehog_cereus\nrainbow_cactus\nepiphyllum, orchid_cactus\nbarrel_cactus\nnight-blooming_cereus\nchichipe, Lemaireocereus_chichipe\nmescal, mezcal, peyote, Lophophora_williamsii\nmescal_button, sacred_mushroom, magic_mushroom\nmammillaria\nfeather_ball, Mammillaria_plumosa\ngarambulla, garambulla_cactus, Myrtillocactus_geometrizans\nKnowlton's_cactus, Pediocactus_knowltonii\nnopal\nprickly_pear, prickly_pear_cactus\ncholla, Opuntia_cholla\nnopal, Opuntia_lindheimeri\ntuna, Opuntia_tuna\nBarbados_gooseberry, Barbados-gooseberry_vine, Pereskia_aculeata\nmistletoe_cactus\nChristmas_cactus, Schlumbergera_buckleyi, Schlumbergera_baridgesii\nnight-blooming_cereus\ncrab_cactus, Thanksgiving_cactus, Zygocactus_truncatus, Schlumbergera_truncatus\npokeweed\nIndian_poke, Phytolacca_acinosa\npoke, pigeon_berry, garget, scoke, Phytolacca_americana\nombu, bella_sombra, Phytolacca_dioica\nbloodberry, blood_berry, rougeberry, rouge_plant, Rivina_humilis\nportulaca\nrose_moss, sun_plant, Portulaca_grandiflora\ncommon_purslane, pussley, pussly, verdolagas, Portulaca_oleracea\nrock_purslane\nred_maids, redmaids, Calandrinia_ciliata\nCarolina_spring_beauty, Claytonia_caroliniana\nspring_beauty, Clatonia_lanceolata\nVirginia_spring_beauty, Claytonia_virginica\nsiskiyou_lewisia, Lewisia_cotyledon\nbitterroot, Lewisia_rediviva\nbroad-leaved_montia, Montia_cordifolia\nblinks, blinking_chickweed, water_chickweed, Montia_lamprosperma\ntoad_lily, Montia_chamissoi\nwinter_purslane, miner's_lettuce, Cuban_spinach, Montia_perfoliata\nflame_flower, flame-flower, flameflower, Talinum_aurantiacum\npigmy_talinum, Talinum_brevifolium\njewels-of-opar, Talinum_paniculatum\ncaper\nnative_pomegranate, Capparis_arborea\ncaper_tree, Jamaica_caper_tree, Capparis_cynophallophora\ncaper_tree, bay-leaved_caper, Capparis_flexuosa\ncommon_caper, Capparis_spinosa\nspiderflower, cleome\nRocky_Mountain_bee_plant, stinking_clover, Cleome_serrulata\nclammyweed, Polanisia_graveolens, Polanisia_dodecandra\ncrucifer, cruciferous_plant\ncress, cress_plant\nwatercress\nstonecress, stone_cress\ngarlic_mustard, hedge_garlic, sauce-alone, jack-by-the-hedge, Alliaria_officinalis\nalyssum, madwort\nrose_of_Jericho, resurrection_plant, Anastatica_hierochuntica\nArabidopsis_thaliana, mouse-ear_cress\nArabidopsis_lyrata\nrock_cress, rockcress\nsicklepod, Arabis_Canadensis\ntower_mustard, tower_cress, Turritis_glabra, Arabis_glabra\nhorseradish, horseradish_root\nwinter_cress, St._Barbara's_herb, scurvy_grass\nyellow_rocket, rockcress, rocket_cress, Barbarea_vulgaris, Sisymbrium_barbarea\nhoary_alison, hoary_alyssum, Berteroa_incana\nbuckler_mustard, Biscutalla_laevigata\nwild_cabbage, Brassica_oleracea\ncabbage, cultivated_cabbage, Brassica_oleracea\nhead_cabbage, head_cabbage_plant, Brassica_oleracea_capitata\nsavoy_cabbage\nbrussels_sprout, Brassica_oleracea_gemmifera\ncauliflower, Brassica_oleracea_botrytis\nbroccoli, Brassica_oleracea_italica\ncollard\nkohlrabi, Brassica_oleracea_gongylodes\nturnip_plant\nturnip, white_turnip, Brassica_rapa\nrutabaga, turnip_cabbage, swede, Swedish_turnip, rutabaga_plant, Brassica_napus_napobrassica\nbroccoli_raab, broccoli_rabe, Brassica_rapa_ruvo\nmustard\nchinese_mustard, indian_mustard, leaf_mustard, gai_choi, Brassica_juncea\nbok_choy, bok_choi, pakchoi, pak_choi, Chinese_white_cabbage, Brassica_rapa_chinensis\nrape, colza, Brassica_napus\nrapeseed\nshepherd's_purse, shepherd's_pouch, Capsella_bursa-pastoris\nlady's_smock, cuckooflower, cuckoo_flower, meadow_cress, Cardamine_pratensis\ncoral-root_bittercress, coralroot, coralwort, Cardamine_bulbifera, Dentaria_bulbifera\ncrinkleroot, crinkle-root, crinkle_root, pepper_root, toothwort, Cardamine_diphylla, Dentaria_diphylla\nAmerican_watercress, mountain_watercress, Cardamine_rotundifolia\nspring_cress, Cardamine_bulbosa\npurple_cress, Cardamine_douglasii\nwallflower, Cheiranthus_cheiri, Erysimum_cheiri\nprairie_rocket\nscurvy_grass, common_scurvy_grass, Cochlearia_officinalis\nsea_kale, sea_cole, Crambe_maritima\ntansy_mustard, Descurainia_pinnata\ndraba\nwallflower\nprairie_rocket\nSiberian_wall_flower, Erysimum_allionii, Cheiranthus_allionii\nwestern_wall_flower, Erysimum_asperum, Cheiranthus_asperus, Erysimum_arkansanum\nwormseed_mustard, Erysimum_cheiranthoides\nheliophila\ndamask_violet, Dame's_violet, sweet_rocket, Hesperis_matronalis\ntansy-leaved_rocket, Hugueninia_tanacetifolia, Sisymbrium_tanacetifolia\ncandytuft\nwoad\ndyer's_woad, Isatis_tinctoria\nbladderpod\nsweet_alyssum, sweet_alison, Lobularia_maritima\nMalcolm_stock, stock\nVirginian_stock, Virginia_stock, Malcolmia_maritima\nstock, gillyflower\nbrompton_stock, Matthiola_incana\nbladderpod\nchamois_cress, Pritzelago_alpina, Lepidium_alpina\nradish_plant, radish\njointed_charlock, wild_radish, wild_rape, runch, Raphanus_raphanistrum\nradish, Raphanus_sativus\nradish, daikon, Japanese_radish, Raphanus_sativus_longipinnatus\nmarsh_cress, yellow_watercress, Rorippa_islandica\ngreat_yellowcress, Rorippa_amphibia, Nasturtium_amphibium\nschizopetalon, Schizopetalon_walkeri\nfield_mustard, wild_mustard, charlock, chadlock, Brassica_kaber, Sinapis_arvensis\nhedge_mustard, Sisymbrium_officinale\ndesert_plume, prince's-plume, Stanleya_pinnata, Cleome_pinnata\npennycress\nfield_pennycress, French_weed, fanweed, penny_grass, stinkweed, mithridate_mustard, Thlaspi_arvense\nfringepod, lacepod\nbladderpod\nwasabi\npoppy\nIceland_poppy, Papaver_alpinum\nwestern_poppy, Papaver_californicum\nprickly_poppy, Papaver_argemone\nIceland_poppy, arctic_poppy, Papaver_nudicaule\noriental_poppy, Papaver_orientale\ncorn_poppy, field_poppy, Flanders_poppy, Papaver_rhoeas\nopium_poppy, Papaver_somniferum\nprickly_poppy, argemone, white_thistle, devil's_fig\nMexican_poppy, Argemone_mexicana\nbocconia, tree_celandine, Bocconia_frutescens\ncelandine, greater_celandine, swallowwort, swallow_wort, Chelidonium_majus\ncorydalis\nclimbing_corydalis, Corydalis_claviculata, Fumaria_claviculata\nCalifornia_poppy, Eschscholtzia_californica\nhorn_poppy, horned_poppy, yellow_horned_poppy, sea_poppy, Glaucium_flavum\ngolden_cup, Mexican_tulip_poppy, Hunnemania_fumariifolia\nplume_poppy, bocconia, Macleaya_cordata\nblue_poppy, Meconopsis_betonicifolia\nWelsh_poppy, Meconopsis_cambrica\ncreamcups, Platystemon_californicus\nmatilija_poppy, California_tree_poppy, Romneya_coulteri\nwind_poppy, flaming_poppy, Stylomecon_heterophyllum, Papaver_heterophyllum\ncelandine_poppy, wood_poppy, Stylophorum_diphyllum\nclimbing_fumitory, Allegheny_vine, Adlumia_fungosa, Fumaria_fungosa\nbleeding_heart, lyreflower, lyre-flower, Dicentra_spectabilis\nDutchman's_breeches, Dicentra_cucullaria\nsquirrel_corn, Dicentra_canadensis\ncomposite, composite_plant\ncompass_plant, compass_flower\neverlasting, everlasting_flower\nachillea\nyarrow, milfoil, Achillea_millefolium\npink-and-white_everlasting, pink_paper_daisy, Acroclinium_roseum\nwhite_snakeroot, white_sanicle, Ageratina_altissima, Eupatorium_rugosum\nageratum\ncommon_ageratum, Ageratum_houstonianum\nsweet_sultan, Amberboa_moschata, Centaurea_moschata\nragweed, ambrosia, bitterweed\ncommon_ragweed, Ambrosia_artemisiifolia\ngreat_ragweed, Ambrosia_trifida\nwestern_ragweed, perennial_ragweed, Ambrosia_psilostachya\nammobium\nwinged_everlasting, Ammobium_alatum\npellitory, pellitory-of-Spain, Anacyclus_pyrethrum\npearly_everlasting, cottonweed, Anaphalis_margaritacea\nandryala\nplantain-leaved_pussytoes\nfield_pussytoes\nsolitary_pussytoes\nmountain_everlasting\nmayweed, dog_fennel, stinking_mayweed, stinking_chamomile, Anthemis_cotula\nyellow_chamomile, golden_marguerite, dyers'_chamomile, Anthemis_tinctoria\ncorn_chamomile, field_chamomile, corn_mayweed, Anthemis_arvensis\nwoolly_daisy, dwarf_daisy, Antheropeas_wallacei, Eriophyllum_wallacei\nburdock, clotbur\ngreat_burdock, greater_burdock, cocklebur, Arctium_lappa\nAfrican_daisy\nblue-eyed_African_daisy, Arctotis_stoechadifolia, Arctotis_venusta\nmarguerite, marguerite_daisy, Paris_daisy, Chrysanthemum_frutescens, Argyranthemum_frutescens\nsilversword, Argyroxiphium_sandwicense\narnica\nheartleaf_arnica, Arnica_cordifolia\nArnica_montana\nlamb_succory, dwarf_nipplewort, Arnoseris_minima\nartemisia\nmugwort\nsweet_wormwood, Artemisia_annua\nfield_wormwood, Artemisia_campestris\ntarragon, estragon, Artemisia_dracunculus\nsand_sage, silvery_wormwood, Artemisia_filifolia\nwormwood_sage, prairie_sagewort, Artemisia_frigida\nwestern_mugwort, white_sage, cudweed, prairie_sage, Artemisia_ludoviciana, Artemisia_gnaphalodes\nRoman_wormwood, Artemis_pontica\nbud_brush, bud_sagebrush, Artemis_spinescens\ncommon_mugwort, Artemisia_vulgaris\naster\nwood_aster\nwhorled_aster, Aster_acuminatus\nheath_aster, Aster_arenosus\nheart-leaved_aster, Aster_cordifolius\nwhite_wood_aster, Aster_divaricatus\nbushy_aster, Aster_dumosus\nheath_aster, Aster_ericoides\nwhite_prairie_aster, Aster_falcatus\nstiff_aster, Aster_linarifolius\ngoldilocks, goldilocks_aster, Aster_linosyris, Linosyris_vulgaris\nlarge-leaved_aster, Aster_macrophyllus\nNew_England_aster, Aster_novae-angliae\nMichaelmas_daisy, New_York_aster, Aster_novi-belgii\nupland_white_aster, Aster_ptarmicoides\nShort's_aster, Aster_shortii\nsea_aster, sea_starwort, Aster_tripolium\nprairie_aster, Aster_turbinellis\nannual_salt-marsh_aster\naromatic_aster\narrow_leaved_aster\nazure_aster\nbog_aster\ncrooked-stemmed_aster\nEastern_silvery_aster\nflat-topped_white_aster\nlate_purple_aster\npanicled_aster\nperennial_salt_marsh_aster\npurple-stemmed_aster\nrough-leaved_aster\nrush_aster\nSchreiber's_aster\nsmall_white_aster\nsmooth_aster\nsouthern_aster\nstarved_aster, calico_aster\ntradescant's_aster\nwavy-leaved_aster\nWestern_silvery_aster\nwillow_aster\nayapana, Ayapana_triplinervis, Eupatorium_aya-pana\nmule_fat, Baccharis_viminea\nbalsamroot\ndaisy\ncommon_daisy, English_daisy, Bellis_perennis\nbur_marigold, burr_marigold, beggar-ticks, beggar's-ticks, sticktight\nSpanish_needles, Bidens_bipinnata\ntickseed_sunflower, Bidens_coronata, Bidens_trichosperma\nEuropean_beggar-ticks, trifid_beggar-ticks, trifid_bur_marigold, Bidens_tripartita\nslender_knapweed\nfalse_chamomile\nSwan_River_daisy, Brachycome_Iberidifolia\nwoodland_oxeye, Buphthalmum_salicifolium\nIndian_plantain\ncalendula\ncommon_marigold, pot_marigold, ruddles, Scotch_marigold, Calendula_officinalis\nChina_aster, Callistephus_chinensis\nthistle\nwelted_thistle, Carduus_crispus\nmusk_thistle, nodding_thistle, Carduus_nutans\ncarline_thistle\nstemless_carline_thistle, Carlina_acaulis\ncommon_carline_thistle, Carlina_vulgaris\nsafflower, false_saffron, Carthamus_tinctorius\nsafflower_seed\ncatananche\nblue_succory, cupid's_dart, Catananche_caerulea\ncentaury\ndusty_miller, Centaurea_cineraria, Centaurea_gymnocarpa\ncornflower, bachelor's_button, bluebottle, Centaurea_cyanus\nstar-thistle, caltrop, Centauria_calcitrapa\nknapweed\nsweet_sultan, Centaurea_imperialis\ngreat_knapweed, greater_knapweed, Centaurea_scabiosa\nBarnaby's_thistle, yellow_star-thistle, Centaurea_solstitialis\nchamomile, camomile, Chamaemelum_nobilis, Anthemis_nobilis\nchaenactis\nchrysanthemum\ncorn_marigold, field_marigold, Chrysanthemum_segetum\ncrown_daisy, Chrysanthemum_coronarium\nchop-suey_greens, tong_ho, shun_giku, Chrysanthemum_coronarium_spatiosum\ngolden_aster\nMaryland_golden_aster, Chrysopsis_mariana\ngoldenbush\nrabbit_brush, rabbit_bush, Chrysothamnus_nauseosus\nchicory, succory, chicory_plant, Cichorium_intybus\nendive, witloof, Cichorium_endivia\nchicory, chicory_root\nplume_thistle, plumed_thistle\nCanada_thistle, creeping_thistle, Cirsium_arvense\nfield_thistle, Cirsium_discolor\nwoolly_thistle, Cirsium_flodmanii\nEuropean_woolly_thistle, Cirsium_eriophorum\nmelancholy_thistle, Cirsium_heterophylum, Cirsium_helenioides\nbrook_thistle, Cirsium_rivulare\nbull_thistle, boar_thistle, spear_thistle, Cirsium_vulgare, Cirsium_lanceolatum\nblessed_thistle, sweet_sultan, Cnicus_benedictus\nmistflower, mist-flower, ageratum, Conoclinium_coelestinum, Eupatorium_coelestinum\nhorseweed, Canadian_fleabane, fleabane, Conyza_canadensis, Erigeron_canadensis\ncoreopsis, tickseed, tickweed, tick-weed\ngiant_coreopsis, Coreopsis_gigantea\nsea_dahlia, Coreopsis_maritima\ncalliopsis, Coreopsis_tinctoria\ncosmos, cosmea\nbrass_buttons, Cotula_coronopifolia\nbilly_buttons\nhawk's-beard, hawk's-beards\nartichoke, globe_artichoke, artichoke_plant, Cynara_scolymus\ncardoon, Cynara_cardunculus\ndahlia, Dahlia_pinnata\nGerman_ivy, Delairea_odorata, Senecio_milkanioides\nflorist's_chrysanthemum, florists'_chrysanthemum, mum, Dendranthema_grandifloruom, Chrysanthemum_morifolium\ncape_marigold, sun_marigold, star_of_the_veldt\nleopard's-bane, leopardbane\nconeflower\nglobe_thistle\nelephant's-foot\ntassel_flower, Emilia_sagitta\nbrittlebush, brittle_bush, incienso, Encelia_farinosa\nsunray, Enceliopsis_nudicaulis\nengelmannia\nfireweed, Erechtites_hieracifolia\nfleabane\nblue_fleabane, Erigeron_acer\ndaisy_fleabane, Erigeron_annuus\norange_daisy, orange_fleabane, Erigeron_aurantiacus\nspreading_fleabane, Erigeron_divergens\nseaside_daisy, beach_aster, Erigeron_glaucous\nPhiladelphia_fleabane, Erigeron_philadelphicus\nrobin's_plantain, Erigeron_pulchellus\nshowy_daisy, Erigeron_speciosus\nwoolly_sunflower\ngolden_yarrow, Eriophyllum_lanatum\ndog_fennel, Eupatorium_capillifolium\nJoe-Pye_weed, spotted_Joe-Pye_weed, Eupatorium_maculatum\nboneset, agueweed, thoroughwort, Eupatorium_perfoliatum\nJoe-Pye_weed, purple_boneset, trumpet_weed, marsh_milkweed, Eupatorium_purpureum\nblue_daisy, blue_marguerite, Felicia_amelloides\nkingfisher_daisy, Felicia_bergeriana\ncotton_rose, cudweed, filago\nherba_impia, Filago_germanica\ngaillardia\ngazania\ntreasure_flower, Gazania_rigens\nAfrican_daisy\nBarberton_daisy, Transvaal_daisy, Gerbera_jamesonii\ndesert_sunflower, Gerea_canescens\ncudweed\nchafeweed, wood_cudweed, Gnaphalium_sylvaticum\ngumweed, gum_plant, tarweed, rosinweed\nGrindelia_robusta\ncurlycup_gumweed, Grindelia_squarrosa\nlittle-head_snakeweed, Gutierrezia_microcephala\nrabbitweed, rabbit-weed, snakeweed, broom_snakeweed, broom_snakeroot, turpentine_weed, Gutierrezia_sarothrae\nbroomweed, broom-weed, Gutierrezia_texana\nvelvet_plant, purple_velvet_plant, royal_velvet_plant, Gynura_aurantiaca\ngoldenbush\ncamphor_daisy, Haplopappus_phyllocephalus\nyellow_spiny_daisy, Haplopappus_spinulosus\nhoary_golden_bush, Hazardia_cana\nsneezeweed\norange_sneezeweed, owlclaws, Helenium_hoopesii\nrosilla, Helenium_puberulum\nsunflower, helianthus\nswamp_sunflower, Helianthus_angustifolius\ncommon_sunflower, mirasol, Helianthus_annuus\ngiant_sunflower, tall_sunflower, Indian_potato, Helianthus_giganteus\nshowy_sunflower, Helianthus_laetiflorus\nMaximilian's_sunflower, Helianthus_maximilianii\nprairie_sunflower, Helianthus_petiolaris\nJerusalem_artichoke, girasol, Jerusalem_artichoke_sunflower, Helianthus_tuberosus\nJerusalem_artichoke\nstrawflower, golden_everlasting, yellow_paper_daisy, Helichrysum_bracteatum\nheliopsis, oxeye\nstrawflower\nhairy_golden_aster, prairie_golden_aster, Heterotheca_villosa, Chrysopsis_villosa\nhawkweed\nrattlesnake_weed, Hieracium_venosum\nalpine_coltsfoot, Homogyne_alpina, Tussilago_alpina\nalpine_gold, alpine_hulsea, Hulsea_algida\ndwarf_hulsea, Hulsea_nana\ncat's-ear, California_dandelion, capeweed, gosmore, Hypochaeris_radicata\ninula\nmarsh_elder, iva\nburweed_marsh_elder, false_ragweed, Iva_xanthifolia\nkrigia\ndwarf_dandelion, Krigia_dandelion, Krigia_bulbosa\ngarden_lettuce, common_lettuce, Lactuca_sativa\ncos_lettuce, romaine_lettuce, Lactuca_sativa_longifolia\nleaf_lettuce, Lactuca_sativa_crispa\nceltuce, stem_lettuce, Lactuca_sativa_asparagina\nprickly_lettuce, horse_thistle, Lactuca_serriola, Lactuca_scariola\ngoldfields, Lasthenia_chrysostoma\ntidytips, tidy_tips, Layia_platyglossa\nhawkbit\nfall_dandelion, arnica_bud, Leontodon_autumnalis\nedelweiss, Leontopodium_alpinum\noxeye_daisy, ox-eyed_daisy, marguerite, moon_daisy, white_daisy, Leucanthemum_vulgare, Chrysanthemum_leucanthemum\noxeye_daisy, Leucanthemum_maximum, Chrysanthemum_maximum\nshasta_daisy, Leucanthemum_superbum, Chrysanthemum_maximum_maximum\nPyrenees_daisy, Leucanthemum_lacustre, Chrysanthemum_lacustre\nnorth_island_edelweiss, Leucogenes_leontopodium\nblazing_star, button_snakeroot, gayfeather, gay-feather, snakeroot\ndotted_gayfeather, Liatris_punctata\ndense_blazing_star, Liatris_pycnostachya\nTexas_star, Lindheimera_texana\nAfrican_daisy, yellow_ageratum, Lonas_inodora, Lonas_annua\ntahoka_daisy, tansy_leaf_aster, Machaeranthera_tanacetifolia\nsticky_aster, Machaeranthera_bigelovii\nMojave_aster, Machaeranthera_tortifoloia\ntarweed\nsweet_false_chamomile, wild_chamomile, German_chamomile, Matricaria_recutita, Matricaria_chamomilla\npineapple_weed, rayless_chamomile, Matricaria_matricarioides\nclimbing_hempweed, climbing_boneset, wild_climbing_hempweed, climbing_hemp-vine, Mikania_scandens\nmutisia\nrattlesnake_root\nwhite_lettuce, cankerweed, Nabalus_alba, Prenanthes_alba\ndaisybush, daisy-bush, daisy_bush\nNew_Zealand_daisybush, Olearia_haastii\ncotton_thistle, woolly_thistle, Scotch_thistle, Onopordum_acanthium, Onopordon_acanthium\nothonna\ncascade_everlasting, Ozothamnus_secundiflorus, Helichrysum_secundiflorum\nbutterweed\nAmerican_feverfew, wild_quinine, prairie_dock, Parthenium_integrifolium\ncineraria, Pericallis_cruenta, Senecio_cruentus\nflorest's_cineraria, Pericallis_hybrida\nbutterbur, bog_rhubarb, Petasites_hybridus, Petasites_vulgaris\nwinter_heliotrope, sweet_coltsfoot, Petasites_fragrans\nsweet_coltsfoot, Petasites_sagitattus\noxtongue, bristly_oxtongue, bitterweed, bugloss, Picris_echioides\nhawkweed\nmouse-ear_hawkweed, Pilosella_officinarum, Hieracium_pilocella\nstevia\nrattlesnake_root, Prenanthes_purpurea\nfleabane, feabane_mullet, Pulicaria_dysenterica\nsheep_plant, vegetable_sheep, Raoulia_lutescens, Raoulia_australis\nconeflower\nMexican_hat, Ratibida_columnaris\nlong-head_coneflower, prairie_coneflower, Ratibida_columnifera\nprairie_coneflower, Ratibida_tagetes\nSwan_River_everlasting, rhodanthe, Rhodanthe_manglesii, Helipterum_manglesii\nconeflower\nblack-eyed_Susan, Rudbeckia_hirta, Rudbeckia_serotina\ncutleaved_coneflower, Rudbeckia_laciniata\ngolden_glow, double_gold, hortensia, Rudbeckia_laciniata_hortensia\nlavender_cotton, Santolina_chamaecyparissus\ncreeping_zinnia, Sanvitalia_procumbens\ngolden_thistle\nSpanish_oyster_plant, Scolymus_hispanicus\nnodding_groundsel, Senecio_bigelovii\ndusty_miller, Senecio_cineraria, Cineraria_maritima\nbutterweed, ragwort, Senecio_glabellus\nragwort, tansy_ragwort, ragweed, benweed, Senecio_jacobaea\narrowleaf_groundsel, Senecio_triangularis\nblack_salsify, viper's_grass, scorzonera, Scorzonera_hispanica\nwhite-topped_aster\nnarrow-leaved_white-topped_aster\nsilver_sage, silver_sagebrush, grey_sage, gray_sage, Seriphidium_canum, Artemisia_cana\nsea_wormwood, Seriphidium_maritimum, Artemisia_maritima\nsawwort, Serratula_tinctoria\nrosinweed, Silphium_laciniatum\nmilk_thistle, lady's_thistle, Our_Lady's_mild_thistle, holy_thistle, blessed_thistle, Silybum_marianum\ngoldenrod\nsilverrod, Solidago_bicolor\nmeadow_goldenrod, Canadian_goldenrod, Solidago_canadensis\nMissouri_goldenrod, Solidago_missouriensis\nalpine_goldenrod, Solidago_multiradiata\ngrey_goldenrod, gray_goldenrod, Solidago_nemoralis\nBlue_Mountain_tea, sweet_goldenrod, Solidago_odora\ndyer's_weed, Solidago_rugosa\nseaside_goldenrod, beach_goldenrod, Solidago_sempervirens\nnarrow_goldenrod, Solidago_spathulata\nBoott's_goldenrod\nElliott's_goldenrod\nOhio_goldenrod\nrough-stemmed_goldenrod\nshowy_goldenrod\ntall_goldenrod\nzigzag_goldenrod, broad_leaved_goldenrod\nsow_thistle, milk_thistle\nmilkweed, Sonchus_oleraceus\nstevia\nstokes'_aster, cornflower_aster, Stokesia_laevis\nmarigold\nAfrican_marigold, big_marigold, Aztec_marigold, Tagetes_erecta\nFrench_marigold, Tagetes_patula\npainted_daisy, pyrethrum, Tanacetum_coccineum, Chrysanthemum_coccineum\npyrethrum, Dalmatian_pyrethrum, Dalmatia_pyrethrum, Tanacetum_cinerariifolium, Chrysanthemum_cinerariifolium\nnorthern_dune_tansy, Tanacetum_douglasii\nfeverfew, Tanacetum_parthenium, Chrysanthemum_parthenium\ndusty_miller, silver-lace, silver_lace, Tanacetum_ptarmiciflorum, Chrysanthemum_ptarmiciflorum\ntansy, golden_buttons, scented_fern, Tanacetum_vulgare\ndandelion, blowball\ncommon_dandelion, Taraxacum_ruderalia, Taraxacum_officinale\ndandelion_green\nRussian_dandelion, kok-saghyz, kok-sagyz, Taraxacum_kok-saghyz\nstemless_hymenoxys, Tetraneuris_acaulis, Hymenoxys_acaulis\nMexican_sunflower, tithonia\nEaster_daisy, stemless_daisy, Townsendia_Exscapa\nyellow_salsify, Tragopogon_dubius\nsalsify, oyster_plant, vegetable_oyster, Tragopogon_porrifolius\nmeadow_salsify, goatsbeard, shepherd's_clock, Tragopogon_pratensis\nscentless_camomile, scentless_false_camomile, scentless_mayweed, scentless_hayweed, corn_mayweed, Tripleurospermum_inodorum, Matricaria_inodorum\nturfing_daisy, Tripleurospermum_tchihatchewii, Matricaria_tchihatchewii\ncoltsfoot, Tussilago_farfara\nursinia\ncrownbeard, crown-beard, crown_beard\nwingstem, golden_ironweed, yellow_ironweed, golden_honey_plant, Verbesina_alternifolia, Actinomeris_alternifolia\ncowpen_daisy, golden_crownbeard, golden_crown_beard, butter_daisy, Verbesina_encelioides, Ximenesia_encelioides\ngravelweed, Verbesina_helianthoides\nVirginia_crownbeard, frostweed, frost-weed, Verbesina_virginica\nironweed, vernonia\nmule's_ears, Wyethia_amplexicaulis\nwhite-rayed_mule's_ears, Wyethia_helianthoides\ncocklebur, cockle-bur, cockleburr, cockle-burr\nxeranthemum\nimmortelle, Xeranthemum_annuum\nzinnia, old_maid, old_maid_flower\nwhite_zinnia, Zinnia_acerosa\nlittle_golden_zinnia, Zinnia_grandiflora\nblazing_star, Mentzelia_livicaulis, Mentzelia_laevicaulis\nbartonia, Mentzelia_lindleyi\nachene\nsamara, key_fruit, key\ncampanula, bellflower\ncreeping_bellflower, Campanula_rapunculoides\nCanterbury_bell, cup_and_saucer, Campanula_medium\ntall_bellflower, Campanula_americana\nmarsh_bellflower, Campanula_aparinoides\nclustered_bellflower, Campanula_glomerata\npeach_bells, peach_bell, willow_bell, Campanula_persicifolia\nchimney_plant, chimney_bellflower, Campanula_pyramidalis\nrampion, rampion_bellflower, Campanula_rapunculus\ntussock_bellflower, spreading_bellflower, Campanula_carpatica\norchid, orchidaceous_plant\norchis\nmale_orchis, early_purple_orchid, Orchis_mascula\nbutterfly_orchid, butterfly_orchis, Orchis_papilionaceae\nshowy_orchis, purple_orchis, purple-hooded_orchis, Orchis_spectabilis\naerides\nangrecum\njewel_orchid\nputtyroot, adam-and-eve, Aplectrum_hyemale\narethusa\nbog_rose, wild_pink, dragon's_mouth, Arethusa_bulbosa\nbletia\nBletilla_striata, Bletia_striata\nbrassavola\nspider_orchid, Brassia_lawrenceana\nspider_orchid, Brassia_verrucosa\ncaladenia\ncalanthe\ngrass_pink, Calopogon_pulchellum, Calopogon_tuberosum\ncalypso, fairy-slipper, Calypso_bulbosa\ncattleya\nhelleborine\nred_helleborine, Cephalanthera_rubra\nspreading_pogonia, funnel-crest_rosebud_orchid, Cleistes_divaricata, Pogonia_divaricata\nrosebud_orchid, Cleistes_rosea, Pogonia_rosea\nsatyr_orchid, Coeloglossum_bracteatum\nfrog_orchid, Coeloglossum_viride\ncoelogyne\ncoral_root\nspotted_coral_root, Corallorhiza_maculata\nstriped_coral_root, Corallorhiza_striata\nearly_coral_root, pale_coral_root, Corallorhiza_trifida\nswan_orchid, swanflower, swan-flower, swanneck, swan-neck\ncymbid, cymbidium\ncypripedia\nlady's_slipper, lady-slipper, ladies'_slipper, slipper_orchid\nmoccasin_flower, nerveroot, Cypripedium_acaule\ncommon_lady's-slipper, showy_lady's-slipper, showy_lady_slipper, Cypripedium_reginae, Cypripedium_album\nram's-head, ram's-head_lady's_slipper, Cypripedium_arietinum\nyellow_lady's_slipper, yellow_lady-slipper, Cypripedium_calceolus, Cypripedium_parviflorum\nlarge_yellow_lady's_slipper, Cypripedium_calceolus_pubescens\nCalifornia_lady's_slipper, Cypripedium_californicum\nclustered_lady's_slipper, Cypripedium_fasciculatum\nmountain_lady's_slipper, Cypripedium_montanum\nmarsh_orchid\ncommon_spotted_orchid, Dactylorhiza_fuchsii, Dactylorhiza_maculata_fuchsii\ndendrobium\ndisa\nphantom_orchid, snow_orchid, Eburophyton_austinae\ntulip_orchid, Encyclia_citrina, Cattleya_citrina\nbutterfly_orchid, Encyclia_tampensis, Epidendrum_tampense\nbutterfly_orchid, butterfly_orchis, Epidendrum_venosum, Encyclia_venosa\nepidendron\nhelleborine\nEpipactis_helleborine\nstream_orchid, chatterbox, giant_helleborine, Epipactis_gigantea\ntongueflower, tongue-flower\nrattlesnake_plantain, helleborine\nfragrant_orchid, Gymnadenia_conopsea\nshort-spurred_fragrant_orchid, Gymnadenia_odoratissima\nfringed_orchis, fringed_orchid\nfrog_orchid\nrein_orchid, rein_orchis\nbog_rein_orchid, bog_candles, Habenaria_dilatata\nwhite_fringed_orchis, white_fringed_orchid, Habenaria_albiflora\nelegant_Habenaria, Habenaria_elegans\npurple-fringed_orchid, purple-fringed_orchis, Habenaria_fimbriata\ncoastal_rein_orchid, Habenaria_greenei\nHooker's_orchid, Habenaria_hookeri\nragged_orchid, ragged_orchis, ragged-fringed_orchid, green_fringed_orchis, Habenaria_lacera\nprairie_orchid, prairie_white-fringed_orchis, Habenaria_leucophaea\nsnowy_orchid, Habenaria_nivea\nround-leaved_rein_orchid, Habenaria_orbiculata\npurple_fringeless_orchid, purple_fringeless_orchis, Habenaria_peramoena\npurple-fringed_orchid, purple-fringed_orchis, Habenaria_psycodes\nAlaska_rein_orchid, Habenaria_unalascensis\ncrested_coral_root, Hexalectris_spicata\nTexas_purple_spike, Hexalectris_warnockii\nlizard_orchid, Himantoglossum_hircinum\nlaelia\nliparis\ntwayblade\nfen_orchid, fen_orchis, Liparis_loeselii\nbroad-leaved_twayblade, Listera_convallarioides\nlesser_twayblade, Listera_cordata\ntwayblade, Listera_ovata\ngreen_adder's_mouth, Malaxis-unifolia, Malaxis_ophioglossoides\nmasdevallia\nmaxillaria\npansy_orchid\nodontoglossum\noncidium, dancing_lady_orchid, butterfly_plant, butterfly_orchid\nbee_orchid, Ophrys_apifera\nfly_orchid, Ophrys_insectifera, Ophrys_muscifera\nspider_orchid\nearly_spider_orchid, Ophrys_sphegodes\nVenus'_slipper, Venus's_slipper, Venus's_shoe\nphaius\nmoth_orchid, moth_plant\nbutterfly_plant, Phalaenopsis_amabilis\nrattlesnake_orchid\nlesser_butterfly_orchid, Platanthera_bifolia, Habenaria_bifolia\ngreater_butterfly_orchid, Platanthera_chlorantha, Habenaria_chlorantha\nprairie_white-fringed_orchid, Platanthera_leucophea\ntangle_orchid\nIndian_crocus\npleurothallis\npogonia\nbutterfly_orchid\nPsychopsis_krameriana, Oncidium_papilio_kramerianum\nPsychopsis_papilio, Oncidium_papilio\nhelmet_orchid, greenhood\nfoxtail_orchid\norange-blossom_orchid, Sarcochilus_falcatus\nsobralia\nladies'_tresses, lady's_tresses\nscrew_augur, Spiranthes_cernua\nhooded_ladies'_tresses, Spiranthes_romanzoffiana\nwestern_ladies'_tresses, Spiranthes_porrifolia\nEuropean_ladies'_tresses, Spiranthes_spiralis\nstanhopea\nstelis\nfly_orchid\nvanda\nblue_orchid, Vanda_caerulea\nvanilla\nvanilla_orchid, Vanilla_planifolia\nyam, yam_plant\nyam\nwhite_yam, water_yam, Dioscorea_alata\ncinnamon_vine, Chinese_yam, Dioscorea_batata\nelephant's-foot, tortoise_plant, Hottentot_bread_vine, Hottentot's_bread_vine, Dioscorea_elephantipes\nwild_yam, Dioscorea_paniculata\ncush-cush, Dioscorea_trifida\nblack_bryony, black_bindweed, Tamus_communis\nprimrose, primula\nEnglish_primrose, Primula_vulgaris\ncowslip, paigle, Primula_veris\noxlip, paigle, Primula_elatior\nChinese_primrose, Primula_sinensis\npolyanthus, Primula_polyantha\npimpernel\nscarlet_pimpernel, red_pimpernel, poor_man's_weatherglass, Anagallis_arvensis\nbog_pimpernel, Anagallis_tenella\nchaffweed, bastard_pimpernel, false_pimpernel\ncyclamen, Cyclamen_purpurascens\nsowbread, Cyclamen_hederifolium, Cyclamen_neopolitanum\nsea_milkwort, sea_trifoly, black_saltwort, Glaux_maritima\nfeatherfoil, feather-foil\nwater_gillyflower, American_featherfoil, Hottonia_inflata\nwater_violet, Hottonia_palustris\nloosestrife\ngooseneck_loosestrife, Lysimachia_clethroides_Duby\nyellow_pimpernel, Lysimachia_nemorum\nfringed_loosestrife, Lysimachia_ciliatum\nmoneywort, creeping_Jenny, creeping_Charlie, Lysimachia_nummularia\nswamp_candles, Lysimachia_terrestris\nwhorled_loosestrife, Lysimachia_quadrifolia\nwater_pimpernel\nbrookweed, Samolus_valerandii\nbrookweed, Samolus_parviflorus, Samolus_floribundus\ncoralberry, spiceberry, Ardisia_crenata\nmarlberry, Ardisia_escallonoides, Ardisia_paniculata\nplumbago\nleadwort, Plumbago_europaea\nthrift\nsea_lavender, marsh_rosemary, statice\nbarbasco, joewood, Jacquinia_keyensis\ngramineous_plant, graminaceous_plant\ngrass\nmidgrass\nshortgrass, short-grass\nsword_grass\ntallgrass, tall-grass\nherbage, pasturage\ngoat_grass, Aegilops_triuncalis\nwheatgrass, wheat-grass\ncrested_wheatgrass, crested_wheat_grass, fairway_crested_wheat_grass, Agropyron_cristatum\nbearded_wheatgrass, Agropyron_subsecundum\nwestern_wheatgrass, bluestem_wheatgrass, Agropyron_smithii\nintermediate_wheatgrass, Agropyron_intermedium, Elymus_hispidus\nslender_wheatgrass, Agropyron_trachycaulum, Agropyron_pauciflorum, Elymus_trachycaulos\nvelvet_bent, velvet_bent_grass, brown_bent, Rhode_Island_bent, dog_bent, Agrostis_canina\ncloud_grass, Agrostis_nebulosa\nmeadow_foxtail, Alopecurus_pratensis\nfoxtail, foxtail_grass\nbroom_grass\nbroom_sedge, Andropogon_virginicus\ntall_oat_grass, tall_meadow_grass, evergreen_grass, false_oat, French_rye, Arrhenatherum_elatius\ntoetoe, toitoi, Arundo_conspicua, Chionochloa_conspicua\noat\ncereal_oat, Avena_sativa\nwild_oat, wild_oat_grass, Avena_fatua\nslender_wild_oat, Avena_barbata\nwild_red_oat, animated_oat, Avene_sterilis\nbrome, bromegrass\nchess, cheat, Bromus_secalinus\nfield_brome, Bromus_arvensis\ngrama, grama_grass, gramma, gramma_grass\nblack_grama, Bouteloua_eriopoda\nbuffalo_grass, Buchloe_dactyloides\nreed_grass\nfeather_reed_grass, feathertop, Calamagrostis_acutiflora\nAustralian_reed_grass, Calamagrostic_quadriseta\nburgrass, bur_grass\nbuffel_grass, Cenchrus_ciliaris, Pennisetum_cenchroides\nRhodes_grass, Chloris_gayana\npampas_grass, Cortaderia_selloana\ngiant_star_grass, Cynodon_plectostachyum\norchard_grass, cocksfoot, cockspur, Dactylis_glomerata\nEgyptian_grass, crowfoot_grass, Dactyloctenium_aegypticum\ncrabgrass, crab_grass, finger_grass\nsmooth_crabgrass, Digitaria_ischaemum\nlarge_crabgrass, hairy_finger_grass, Digitaria_sanguinalis\nbarnyard_grass, barn_grass, barn_millet, Echinochloa_crusgalli\nJapanese_millet, billion-dollar_grass, Japanese_barnyard_millet, sanwa_millet, Echinochloa_frumentacea\nyardgrass, yard_grass, wire_grass, goose_grass, Eleusine_indica\nfinger_millet, ragi, ragee, African_millet, coracan, corakan, kurakkan, Eleusine_coracana\nlyme_grass\nwild_rye\ngiant_ryegrass, Elymus_condensatus, Leymus_condensatus\nsea_lyme_grass, European_dune_grass, Elymus_arenarius, Leymus_arenaria\nCanada_wild_rye, Elymus_canadensis\nteff, teff_grass, Eragrostis_tef, Eragrostic_abyssinica\nweeping_love_grass, African_love_grass, Eragrostis_curvula\nplume_grass\nRavenna_grass, wool_grass, Erianthus_ravennae\nfescue, fescue_grass, meadow_fescue, Festuca_elatior\nreed_meadow_grass, Glyceria_grandis\nvelvet_grass, Yorkshire_fog, Holcus_lanatus\ncreeping_soft_grass, Holcus_mollis\nbarleycorn\nbarley_grass, wall_barley, Hordeum_murinum\nlittle_barley, Hordeum_pusillum\nrye_grass, ryegrass\nperennial_ryegrass, English_ryegrass, Lolium_perenne\nItalian_ryegrass, Italian_rye, Lolium_multiflorum\ndarnel, tare, bearded_darnel, cheat, Lolium_temulentum\nnimblewill, nimble_Will, Muhlenbergia_schreberi\ncultivated_rice, Oryza_sativa\nricegrass, rice_grass\nsmilo, smilo_grass, Oryzopsis_miliacea\nswitch_grass, Panicum_virgatum\nbroomcorn_millet, hog_millet, Panicum_miliaceum\ngoose_grass, Texas_millet, Panicum_Texanum\ndallisgrass, dallis_grass, paspalum, Paspalum_dilatatum\nBahia_grass, Paspalum_notatum\nknotgrass, Paspalum_distichum\nfountain_grass, Pennisetum_ruppelii, Pennisetum_setaceum\nreed_canary_grass, gardener's_garters, lady's_laces, ribbon_grass, Phalaris_arundinacea\ncanary_grass, birdseed_grass, Phalaris_canariensis\ntimothy, herd's_grass, Phleum_pratense\nbluegrass, blue_grass\nmeadowgrass, meadow_grass\nwood_meadowgrass, Poa_nemoralis, Agrostis_alba\nnoble_cane\nmunj, munja, Saccharum_bengalense, Saccharum_munja\nbroom_beard_grass, prairie_grass, wire_grass, Andropogon_scoparius, Schizachyrium_scoparium\nbluestem, blue_stem, Andropogon_furcatus, Andropogon_gerardii\nrye, Secale_cereale\nbristlegrass, bristle_grass\ngiant_foxtail\nyellow_bristlegrass, yellow_bristle_grass, yellow_foxtail, glaucous_bristlegrass, Setaria_glauca\ngreen_bristlegrass, green_foxtail, rough_bristlegrass, bottle-grass, bottle_grass, Setaria_viridis\nSiberian_millet, Setaria_italica_rubrofructa\nGerman_millet, golden_wonder_millet, Setaria_italica_stramineofructa\nmillet\nrattan, rattan_cane\nmalacca\nreed\nsorghum\ngrain_sorghum\ndurra, doura, dourah, Egyptian_corn, Indian_millet, Guinea_corn\nfeterita, federita, Sorghum_vulgare_caudatum\nhegari\nkaoliang\nmilo, milo_maize\nshallu, Sorghum_vulgare_rosburghii\nbroomcorn, Sorghum_vulgare_technicum\ncordgrass, cord_grass\nsalt_reed_grass, Spartina_cynosuroides\nprairie_cordgrass, freshwater_cordgrass, slough_grass, Spartina_pectinmata\nsmut_grass, blackseed, carpet_grass, Sporobolus_poiretii\nsand_dropseed, Sporobolus_cryptandrus\nrush_grass, rush-grass\nSt._Augustine_grass, Stenotaphrum_secundatum, buffalo_grass\ngrain\ncereal, cereal_grass\nwheat\nwheat_berry\ndurum, durum_wheat, hard_wheat, Triticum_durum, Triticum_turgidum, macaroni_wheat\nspelt, Triticum_spelta, Triticum_aestivum_spelta\nemmer, starch_wheat, two-grain_spelt, Triticum_dicoccum\nwild_wheat, wild_emmer, Triticum_dicoccum_dicoccoides\ncorn, maize, Indian_corn, Zea_mays\nmealie\ncorn\ndent_corn, Zea_mays_indentata\nflint_corn, flint_maize, Yankee_corn, Zea_mays_indurata\npopcorn, Zea_mays_everta\nzoysia\nManila_grass, Japanese_carpet_grass, Zoysia_matrella\nKorean_lawn_grass, Japanese_lawn_grass, Zoysia_japonica\nbamboo\ncommon_bamboo, Bambusa_vulgaris\ngiant_bamboo, kyo-chiku, Dendrocalamus_giganteus\numbrella_plant, umbrella_sedge, Cyperus_alternifolius\nchufa, yellow_nutgrass, earth_almond, ground_almond, rush_nut, Cyperus_esculentus\ngalingale, galangal, Cyperus_longus\nnutgrass, nut_grass, nutsedge, nut_sedge, Cyperus_rotundus\nsand_sedge, sand_reed, Carex_arenaria\ncypress_sedge, Carex_pseudocyperus\ncotton_grass, cotton_rush\ncommon_cotton_grass, Eriophorum_angustifolium\nhardstem_bulrush, hardstemmed_bulrush, Scirpus_acutus\nwool_grass, Scirpus_cyperinus\nspike_rush\nwater_chestnut, Chinese_water_chestnut, Eleocharis_dulcis\nneedle_spike_rush, needle_rush, slender_spike_rush, hair_grass, Eleocharis_acicularis\ncreeping_spike_rush, Eleocharis_palustris\npandanus, screw_pine\ntextile_screw_pine, lauhala, Pandanus_tectorius\ncattail\ncat's-tail, bullrush, bulrush, nailrod, reed_mace, reedmace, Typha_latifolia\nbur_reed\ngrain, caryopsis\nkernel\nrye\ngourd, gourd_vine\ngourd\npumpkin, pumpkin_vine, autumn_pumpkin, Cucurbita_pepo\nsquash, squash_vine\nsummer_squash, summer_squash_vine, Cucurbita_pepo_melopepo\nyellow_squash\nmarrow, marrow_squash, vegetable_marrow\nzucchini, courgette\ncocozelle, Italian_vegetable_marrow\ncymling, pattypan_squash\nspaghetti_squash\nwinter_squash, winter_squash_plant\nacorn_squash\nhubbard_squash, Cucurbita_maxima\nturban_squash, Cucurbita_maxima_turbaniformis\nbuttercup_squash\nbutternut_squash, Cucurbita_maxima\nwinter_crookneck, winter_crookneck_squash, Cucurbita_moschata\ncushaw, Cucurbita_mixta, Cucurbita_argyrosperma\nprairie_gourd, prairie_gourd_vine, Missouri_gourd, wild_pumpkin, buffalo_gourd, calabazilla, Cucurbita_foetidissima\nprairie_gourd\nbryony, briony\nwhite_bryony, devil's_turnip, Bryonia_alba\nsweet_melon, muskmelon, sweet_melon_vine, Cucumis_melo\ncantaloupe, cantaloup, cantaloupe_vine, cantaloup_vine, Cucumis_melo_cantalupensis\nwinter_melon, Persian_melon, honeydew_melon, winter_melon_vine, Cucumis_melo_inodorus\nnet_melon, netted_melon, nutmeg_melon, Cucumis_melo_reticulatus\ncucumber, cucumber_vine, Cucumis_sativus\nsquirting_cucumber, exploding_cucumber, touch-me-not, Ecballium_elaterium\nbottle_gourd, calabash, Lagenaria_siceraria\nluffa, dishcloth_gourd, sponge_gourd, rag_gourd, strainer_vine\nloofah, vegetable_sponge, Luffa_cylindrica\nangled_loofah, sing-kwa, Luffa_acutangula\nloofa, loofah, luffa, loufah_sponge\nbalsam_apple, Momordica_balsamina\nbalsam_pear, Momordica_charantia\nlobelia\nwater_lobelia, Lobelia_dortmanna\nmallow\nmusk_mallow, mus_rose, Malva_moschata\ncommon_mallow, Malva_neglecta\nokra, gumbo, okra_plant, lady's-finger, Abelmoschus_esculentus, Hibiscus_esculentus\nokra\nabelmosk, musk_mallow, Abelmoschus_moschatus, Hibiscus_moschatus\nflowering_maple\nvelvetleaf, velvet-leaf, velvetweed, Indian_mallow, butter-print, China_jute, Abutilon_theophrasti\nhollyhock\nrose_mallow, Alcea_rosea, Althea_rosea\nalthea, althaea, hollyhock\nmarsh_mallow, white_mallow, Althea_officinalis\npoppy_mallow\nfringed_poppy_mallow, Callirhoe_digitata\npurple_poppy_mallow, Callirhoe_involucrata\nclustered_poppy_mallow, Callirhoe_triangulata\nsea_island_cotton, tree_cotton, Gossypium_barbadense\nLevant_cotton, Gossypium_herbaceum\nupland_cotton, Gossypium_hirsutum\nPeruvian_cotton, Gossypium_peruvianum\nwild_cotton, Arizona_wild_cotton, Gossypium_thurberi\nkenaf, kanaf, deccan_hemp, bimli, bimli_hemp, Indian_hemp, Bombay_hemp, Hibiscus_cannabinus\nsorrel_tree, Hibiscus_heterophyllus\nrose_mallow, swamp_mallow, common_rose_mallow, swamp_rose_mallow, Hibiscus_moscheutos\ncotton_rose, Confederate_rose, Confederate_rose_mallow, Hibiscus_mutabilis\nroselle, rozelle, sorrel, red_sorrel, Jamaica_sorrel, Hibiscus_sabdariffa\nmahoe, majagua, mahagua, balibago, purau, Hibiscus_tiliaceus\nflower-of-an-hour, flowers-of-an-hour, bladder_ketmia, black-eyed_Susan, Hibiscus_trionum\nlacebark, ribbonwood, houhere, Hoheria_populnea\nwild_hollyhock, Iliamna_remota, Sphaeralcea_remota\nmountain_hollyhock, Iliamna_ruvularis, Iliamna_acerifolia\nseashore_mallow\nsalt_marsh_mallow, Kosteletzya_virginica\nchaparral_mallow, Malacothamnus_fasciculatus, Sphaeralcea_fasciculata\nmalope, Malope_trifida\nfalse_mallow\nwaxmallow, wax_mallow, sleeping_hibiscus\nglade_mallow, Napaea_dioica\npavonia\nribbon_tree, ribbonwood, Plagianthus_regius, Plagianthus_betulinus\nbush_hibiscus, Radyera_farragei, Hibiscus_farragei\nVirginia_mallow, Sida_hermaphrodita\nQueensland_hemp, jellyleaf, Sida_rhombifolia\nIndian_mallow, Sida_spinosa\ncheckerbloom, wild_hollyhock, Sidalcea_malviflora\nglobe_mallow, false_mallow\nprairie_mallow, red_false_mallow, Sphaeralcea_coccinea, Malvastrum_coccineum\ntulipwood_tree\nportia_tree, bendy_tree, seaside_mahoe, Thespesia_populnea\nred_silk-cotton_tree, simal, Bombax_ceiba, Bombax_malabarica\ncream-of-tartar_tree, sour_gourd, Adansonia_gregorii\nbaobab, monkey-bread_tree, Adansonia_digitata\nkapok, ceiba_tree, silk-cotton_tree, white_silk-cotton_tree, Bombay_ceiba, God_tree, Ceiba_pentandra\ndurian, durion, durian_tree, Durio_zibethinus\nMontezuma\nshaving-brush_tree, Pseudobombax_ellipticum\nquandong, quandong_tree, Brisbane_quandong, silver_quandong_tree, blue_fig, Elaeocarpus_grandis\nquandong, blue_fig\nmakomako, New_Zealand_wine_berry, wineberry, Aristotelia_serrata, Aristotelia_racemosa\nJamaican_cherry, calabur_tree, calabura, silk_wood, silkwood, Muntingia_calabura\nbreakax, breakaxe, break-axe, Sloanea_jamaicensis\nsterculia\nPanama_tree, Sterculia_apetala\nkalumpang, Java_olives, Sterculia_foetida\nbottle-tree, bottle_tree\nflame_tree, flame_durrajong, Brachychiton_acerifolius, Sterculia_acerifolia\nflame_tree, broad-leaved_bottletree, Brachychiton_australis\nkurrajong, currajong, Brachychiton_populneus\nQueensland_bottletree, narrow-leaved_bottletree, Brachychiton_rupestris, Sterculia_rupestris\nkola, kola_nut, kola_nut_tree, goora_nut, Cola_acuminata\nkola_nut, cola_nut\nChinese_parasol_tree, Chinese_parasol, Japanese_varnish_tree, phoenix_tree, Firmiana_simplex\nflannelbush, flannel_bush, California_beauty\nscrew_tree\nnut-leaved_screw_tree, Helicteres_isora\nred_beech, brown_oak, booyong, crow's_foot, stave_wood, silky_elm, Heritiera_trifoliolata, Terrietia_trifoliolata\nlooking_glass_tree, Heritiera_macrophylla\nlooking-glass_plant, Heritiera_littoralis\nhoney_bell, honeybells, Hermannia_verticillata, Mahernia_verticillata\nmayeng, maple-leaved_bayur, Pterospermum_acerifolium\nsilver_tree, Tarrietia_argyrodendron\ncacao, cacao_tree, chocolate_tree, Theobroma_cacao\nobeche, obechi, arere, samba, Triplochiton_scleroxcylon\nlinden, linden_tree, basswood, lime, lime_tree\nAmerican_basswood, American_lime, Tilia_americana\nsmall-leaved_linden, small-leaved_lime, Tilia_cordata\nwhite_basswood, cottonwood, Tilia_heterophylla\nJapanese_linden, Japanese_lime, Tilia_japonica\nsilver_lime, silver_linden, Tilia_tomentosa\ncorchorus\nAfrican_hemp, Sparmannia_africana\nherb, herbaceous_plant\nprotea\nhoneypot, king_protea, Protea_cynaroides\nhoneyflower, honey-flower, Protea_mellifera\nbanksia\nhoneysuckle, Australian_honeysuckle, coast_banksia, Banksia_integrifolia\nsmoke_bush\nChilean_firebush, Chilean_flameflower, Embothrium_coccineum\nChilean_nut, Chile_nut, Chile_hazel, Chilean_hazelnut, Guevina_heterophylla, Guevina_avellana\ngrevillea\nred-flowered_silky_oak, Grevillea_banksii\nsilky_oak, Grevillea_robusta\nbeefwood, Grevillea_striata\ncushion_flower, pincushion_hakea, Hakea_laurina\nrewa-rewa, New_Zealand_honeysuckle\nhoneyflower, honey-flower, mountain_devil, Lambertia_formosa\nsilver_tree, Leucadendron_argenteum\nlomatia\nmacadamia, macadamia_tree\nMacadamia_integrifolia\nmacadamia_nut, macadamia_nut_tree, Macadamia_ternifolia\nQueensland_nut, Macadamia_tetraphylla\nprickly_ash, Orites_excelsa\ngeebung\nwheel_tree, firewheel_tree, Stenocarpus_sinuatus\nscrub_beefwood, beefwood, Stenocarpus_salignus\nwaratah, Telopea_Oreades\nwaratah, Telopea_speciosissima\ncasuarina\nshe-oak\nbeefwood\nAustralian_pine, Casuarina_equisetfolia\nheath\ntree_heath, briar, brier, Erica_arborea\nbriarroot\nwinter_heath, spring_heath, Erica_carnea\nbell_heather, heather_bell, fine-leaved_heath, Erica_cinerea\nCornish_heath, Erica_vagans\nSpanish_heath, Portuguese_heath, Erica_lusitanica\nPrince-of-Wales'-heath, Prince_of_Wales_heath, Erica_perspicua\nbog_rosemary, moorwort, Andromeda_glaucophylla\nmarsh_andromeda, common_bog_rosemary, Andromeda_polifolia\nmadrona, madrono, manzanita, Arbutus_menziesii\nstrawberry_tree, Irish_strawberry, Arbutus_unedo\nbearberry\nalpine_bearberry, black_bearberry, Arctostaphylos_alpina\nheartleaf_manzanita, Arctostaphylos_andersonii\nParry_manzanita, Arctostaphylos_manzanita\nspike_heath, Bruckenthalia_spiculifolia\nbryanthus\nleatherleaf, Chamaedaphne_calyculata\nConnemara_heath, St._Dabeoc's_heath, Daboecia_cantabrica\ntrailing_arbutus, mayflower, Epigaea_repens\ncreeping_snowberry, moxie_plum, maidenhair_berry, Gaultheria_hispidula\nsalal, shallon, Gaultheria_shallon\nhuckleberry\nblack_huckleberry, Gaylussacia_baccata\ndangleberry, dangle-berry, Gaylussacia_frondosa\nbox_huckleberry, Gaylussacia_brachycera\nkalmia\nmountain_laurel, wood_laurel, American_laurel, calico_bush, Kalmia_latifolia\nswamp_laurel, bog_laurel, bog_kalmia, Kalmia_polifolia\ntrapper's_tea, glandular_Labrador_tea\nwild_rosemary, marsh_tea, Ledum_palustre\nsand_myrtle, Leiophyllum_buxifolium\nleucothoe\ndog_laurel, dog_hobble, switch-ivy, Leucothoe_fontanesiana, Leucothoe_editorum\nsweet_bells, Leucothoe_racemosa\nalpine_azalea, mountain_azalea, Loiseleuria_procumbens\nstaggerbush, stagger_bush, Lyonia_mariana\nmaleberry, male_berry, privet_andromeda, he-huckleberry, Lyonia_ligustrina\nfetterbush, fetter_bush, shiny_lyonia, Lyonia_lucida\nfalse_azalea, fool's_huckleberry, Menziesia_ferruginea\nminniebush, minnie_bush, Menziesia_pilosa\nsorrel_tree, sourwood, titi, Oxydendrum_arboreum\nmountain_heath, Phyllodoce_caerulea, Bryanthus_taxifolius\npurple_heather, Brewer's_mountain_heather, Phyllodoce_breweri\nfetterbush, mountain_fetterbush, mountain_andromeda, Pieris_floribunda\nrhododendron\ncoast_rhododendron, Rhododendron_californicum\nrosebay, Rhododendron_maxima\nswamp_azalea, swamp_honeysuckle, white_honeysuckle, Rhododendron_viscosum\nazalea\ncranberry\nAmerican_cranberry, large_cranberry, Vaccinium_macrocarpon\nEuropean_cranberry, small_cranberry, Vaccinium_oxycoccus\nblueberry, blueberry_bush\nfarkleberry, sparkleberry, Vaccinium_arboreum\nlow-bush_blueberry, low_blueberry, Vaccinium_angustifolium, Vaccinium_pennsylvanicum\nrabbiteye_blueberry, rabbit-eye_blueberry, rabbiteye, Vaccinium_ashei\ndwarf_bilberry, dwarf_blueberry, Vaccinium_caespitosum\nevergreen_blueberry, Vaccinium_myrsinites\nevergreen_huckleberry, Vaccinium_ovatum\nbilberry, thin-leaved_bilberry, mountain_blue_berry, Viccinium_membranaceum\nbilberry, whortleberry, whinberry, blaeberry, Viccinium_myrtillus\nbog_bilberry, bog_whortleberry, moor_berry, Vaccinium_uliginosum_alpinum\ndryland_blueberry, dryland_berry, Vaccinium_pallidum\ngrouseberry, grouse-berry, grouse_whortleberry, Vaccinium_scoparium\ndeerberry, squaw_huckleberry, Vaccinium_stamineum\ncowberry, mountain_cranberry, lingonberry, lingenberry, lingberry, foxberry, Vaccinium_vitis-idaea\ndiapensia\ngalax, galaxy, wandflower, beetleweed, coltsfoot, Galax_urceolata\npyxie, pixie, pixy, Pyxidanthera_barbulata\nshortia\noconee_bells, Shortia_galacifolia\nAustralian_heath\nepacris\ncommon_heath, Epacris_impressa\ncommon_heath, blunt-leaf_heath, Epacris_obtusifolia\nPort_Jackson_heath, Epacris_purpurascens\nnative_cranberry, groundberry, ground-berry, cranberry_heath, Astroloma_humifusum, Styphelia_humifusum\npink_fivecorner, Styphelia_triflora\nwintergreen, pyrola\nfalse_wintergreen, Pyrola_americana, Pyrola_rotundifolia_americana\nlesser_wintergreen, Pyrola_minor\nwild_lily_of_the_valley, shinleaf, Pyrola_elliptica\nwild_lily_of_the_valley, Pyrola_rotundifolia\npipsissewa, prince's_pine\nlove-in-winter, western_prince's_pine, Chimaphila_umbellata, Chimaphila_corymbosa\none-flowered_wintergreen, one-flowered_pyrola, Moneses_uniflora, Pyrola_uniflora\nIndian_pipe, waxflower, Monotropa_uniflora\npinesap, false_beachdrops, Monotropa_hypopithys\nbeech, beech_tree\ncommon_beech, European_beech, Fagus_sylvatica\ncopper_beech, purple_beech, Fagus_sylvatica_atropunicea, Fagus_purpurea, Fagus_sylvatica_purpurea\nAmerican_beech, white_beech, red_beech, Fagus_grandifolia, Fagus_americana\nweeping_beech, Fagus_pendula, Fagus_sylvatica_pendula\nJapanese_beech\nchestnut, chestnut_tree\nAmerican_chestnut, American_sweet_chestnut, Castanea_dentata\nEuropean_chestnut, sweet_chestnut, Spanish_chestnut, Castanea_sativa\nChinese_chestnut, Castanea_mollissima\nJapanese_chestnut, Castanea_crenata\nAllegheny_chinkapin, eastern_chinquapin, chinquapin, dwarf_chestnut, Castanea_pumila\nOzark_chinkapin, Ozark_chinquapin, chinquapin, Castanea_ozarkensis\noak_chestnut\ngiant_chinkapin, golden_chinkapin, Chrysolepis_chrysophylla, Castanea_chrysophylla, Castanopsis_chrysophylla\ndwarf_golden_chinkapin, Chrysolepis_sempervirens\ntanbark_oak, Lithocarpus_densiflorus\nJapanese_oak, Lithocarpus_glabra, Lithocarpus_glaber\nsouthern_beech, evergreen_beech\nmyrtle_beech, Nothofagus_cuninghamii\nCoigue, Nothofagus_dombeyi\nNew_Zealand_beech\nsilver_beech, Nothofagus_menziesii\nroble_beech, Nothofagus_obliqua\nrauli_beech, Nothofagus_procera\nblack_beech, Nothofagus_solanderi\nhard_beech, Nothofagus_truncata\nacorn\ncupule, acorn_cup\noak, oak_tree\nlive_oak\ncoast_live_oak, California_live_oak, Quercus_agrifolia\nwhite_oak\nAmerican_white_oak, Quercus_alba\nArizona_white_oak, Quercus_arizonica\nswamp_white_oak, swamp_oak, Quercus_bicolor\nEuropean_turkey_oak, turkey_oak, Quercus_cerris\ncanyon_oak, canyon_live_oak, maul_oak, iron_oak, Quercus_chrysolepis\nscarlet_oak, Quercus_coccinea\njack_oak, northern_pin_oak, Quercus_ellipsoidalis\nred_oak\nsouthern_red_oak, swamp_red_oak, turkey_oak, Quercus_falcata\nOregon_white_oak, Oregon_oak, Garry_oak, Quercus_garryana\nholm_oak, holm_tree, holly-leaved_oak, evergreen_oak, Quercus_ilex\nbear_oak, Quercus_ilicifolia\nshingle_oak, laurel_oak, Quercus_imbricaria\nbluejack_oak, turkey_oak, Quercus_incana\nCalifornia_black_oak, Quercus_kelloggii\nAmerican_turkey_oak, turkey_oak, Quercus_laevis\nlaurel_oak, pin_oak, Quercus_laurifolia\nCalifornia_white_oak, valley_oak, valley_white_oak, roble, Quercus_lobata\novercup_oak, Quercus_lyrata\nbur_oak, burr_oak, mossy-cup_oak, mossycup_oak, Quercus_macrocarpa\nscrub_oak\nblackjack_oak, blackjack, jack_oak, Quercus_marilandica\nswamp_chestnut_oak, Quercus_michauxii\nJapanese_oak, Quercus_mongolica, Quercus_grosseserrata\nchestnut_oak\nchinquapin_oak, chinkapin_oak, yellow_chestnut_oak, Quercus_muehlenbergii\nmyrtle_oak, seaside_scrub_oak, Quercus_myrtifolia\nwater_oak, possum_oak, Quercus_nigra\nNuttall_oak, Nuttall's_oak, Quercus_nuttalli\ndurmast, Quercus_petraea, Quercus_sessiliflora\nbasket_oak, cow_oak, Quercus_prinus, Quercus_montana\npin_oak, swamp_oak, Quercus_palustris\nwillow_oak, Quercus_phellos\ndwarf_chinkapin_oak, dwarf_chinquapin_oak, dwarf_oak, Quercus_prinoides\ncommon_oak, English_oak, pedunculate_oak, Quercus_robur\nnorthern_red_oak, Quercus_rubra, Quercus_borealis\nShumard_oak, Shumard_red_oak, Quercus_shumardii\npost_oak, box_white_oak, brash_oak, iron_oak, Quercus_stellata\ncork_oak, Quercus_suber\nSpanish_oak, Quercus_texana\nhuckleberry_oak, Quercus_vaccinifolia\nChinese_cork_oak, Quercus_variabilis\nblack_oak, yellow_oak, quercitron, quercitron_oak, Quercus_velutina\nsouthern_live_oak, Quercus_virginiana\ninterior_live_oak, Quercus_wislizenii, Quercus_wizlizenii\nmast\nbirch, birch_tree\nyellow_birch, Betula_alleghaniensis, Betula_leutea\nAmerican_white_birch, paper_birch, paperbark_birch, canoe_birch, Betula_cordifolia, Betula_papyrifera\ngrey_birch, gray_birch, American_grey_birch, American_gray_birch, Betula_populifolia\nsilver_birch, common_birch, European_white_birch, Betula_pendula\ndowny_birch, white_birch, Betula_pubescens\nblack_birch, river_birch, red_birch, Betula_nigra\nsweet_birch, cherry_birch, black_birch, Betula_lenta\nYukon_white_birch, Betula_neoalaskana\nswamp_birch, water_birch, mountain_birch, Western_paper_birch, Western_birch, Betula_fontinalis\nNewfoundland_dwarf_birch, American_dwarf_birch, Betula_glandulosa\nalder, alder_tree\ncommon_alder, European_black_alder, Alnus_glutinosa, Alnus_vulgaris\ngrey_alder, gray_alder, Alnus_incana\nseaside_alder, Alnus_maritima\nwhite_alder, mountain_alder, Alnus_rhombifolia\nred_alder, Oregon_alder, Alnus_rubra\nspeckled_alder, Alnus_rugosa\nsmooth_alder, hazel_alder, Alnus_serrulata\ngreen_alder, Alnus_veridis\ngreen_alder, Alnus_veridis_crispa, Alnus_crispa\nhornbeam\nEuropean_hornbeam, Carpinus_betulus\nAmerican_hornbeam, Carpinus_caroliniana\nhop_hornbeam\nOld_World_hop_hornbeam, Ostrya_carpinifolia\nEastern_hop_hornbeam, ironwood, ironwood_tree, Ostrya_virginiana\nhazelnut, hazel, hazelnut_tree\nAmerican_hazel, Corylus_americana\ncobnut, filbert, Corylus_avellana, Corylus_avellana_grandis\nbeaked_hazelnut, Corylus_cornuta\ncentaury\nrosita, Centaurium_calycosum\nlesser_centaury, Centaurium_minus\nseaside_centaury\nslender_centaury\nprairie_gentian, tulip_gentian, bluebell, Eustoma_grandiflorum\nPersian_violet, Exacum_affine\ncolumbo, American_columbo, deer's-ear, deer's-ears, pyramid_plant, American_gentian\ngentian\ngentianella, Gentiana_acaulis\nclosed_gentian, blind_gentian, bottle_gentian, Gentiana_andrewsii\nexplorer's_gentian, Gentiana_calycosa\nclosed_gentian, blind_gentian, Gentiana_clausa\ngreat_yellow_gentian, Gentiana_lutea\nmarsh_gentian, calathian_violet, Gentiana_pneumonanthe\nsoapwort_gentian, Gentiana_saponaria\nstriped_gentian, Gentiana_villosa\nagueweed, ague_weed, five-flowered_gentian, stiff_gentian, Gentianella_quinquefolia, Gentiana_quinquefolia\nfelwort, gentianella_amarella\nfringed_gentian\nGentianopsis_crinita, Gentiana_crinita\nGentianopsis_detonsa, Gentiana_detonsa\nGentianopsid_procera, Gentiana_procera\nGentianopsis_thermalis, Gentiana_thermalis\ntufted_gentian, Gentianopsis_holopetala, Gentiana_holopetala\nspurred_gentian\nsabbatia\ntoothbrush_tree, mustard_tree, Salvadora_persica\nolive_tree\nolive, European_olive_tree, Olea_europaea\nolive\nblack_maire, Olea_cunninghamii\nwhite_maire, Olea_lanceolata\nfringe_tree\nfringe_bush, Chionanthus_virginicus\nforestiera\nforsythia\nash, ash_tree\nwhite_ash, Fraxinus_Americana\nswamp_ash, Fraxinus_caroliniana\nflowering_ash, Fraxinus_cuspidata\nEuropean_ash, common_European_ash, Fraxinus_excelsior\nOregon_ash, Fraxinus_latifolia, Fraxinus_oregona\nblack_ash, basket_ash, brown_ash, hoop_ash, Fraxinus_nigra\nmanna_ash, flowering_ash, Fraxinus_ornus\nred_ash, downy_ash, Fraxinus_pennsylvanica\ngreen_ash, Fraxinus_pennsylvanica_subintegerrima\nblue_ash, Fraxinus_quadrangulata\nmountain_ash, Fraxinus_texensis\npumpkin_ash, Fraxinus_tomentosa\nArizona_ash, Fraxinus_velutina\njasmine\nprimrose_jasmine, Jasminum_mesnyi\nwinter_jasmine, Jasminum_nudiflorum\ncommon_jasmine, true_jasmine, jessamine, Jasminum_officinale\nprivet\nAmur_privet, Ligustrum_amurense\nJapanese_privet, Ligustrum_japonicum\nLigustrum_obtusifolium\ncommon_privet, Ligustrum_vulgare\ndevilwood, American_olive, Osmanthus_americanus\nmock_privet\nlilac\nHimalayan_lilac, Syringa_emodi\nPersian_lilac, Syringa_persica\nJapanese_tree_lilac, Syringa_reticulata, Syringa_amurensis_japonica\nJapanese_lilac, Syringa_villosa\ncommon_lilac, Syringa_vulgaris\nbloodwort\nkangaroo_paw, kangaroo's_paw, kangaroo's-foot, kangaroo-foot_plant, Australian_sword_lily, Anigozanthus_manglesii\nVirginian_witch_hazel, Hamamelis_virginiana\nvernal_witch_hazel, Hamamelis_vernalis\nwinter_hazel, flowering_hazel\nfothergilla, witch_alder\nliquidambar\nsweet_gum, sweet_gum_tree, bilsted, red_gum, American_sweet_gum, Liquidambar_styraciflua\niron_tree, iron-tree, ironwood, ironwood_tree\nwalnut, walnut_tree\nCalifornia_black_walnut, Juglans_californica\nbutternut, butternut_tree, white_walnut, Juglans_cinerea\nblack_walnut, black_walnut_tree, black_hickory, Juglans_nigra\nEnglish_walnut, English_walnut_tree, Circassian_walnut, Persian_walnut, Juglans_regia\nhickory, hickory_tree\nwater_hickory, bitter_pecan, water_bitternut, Carya_aquatica\npignut, pignut_hickory, brown_hickory, black_hickory, Carya_glabra\nbitternut, bitternut_hickory, bitter_hickory, bitter_pignut, swamp_hickory, Carya_cordiformis\npecan, pecan_tree, Carya_illinoensis, Carya_illinoinsis\nbig_shellbark, big_shellbark_hickory, big_shagbark, king_nut, king_nut_hickory, Carya_laciniosa\nnutmeg_hickory, Carya_myristicaeformis, Carya_myristiciformis\nshagbark, shagbark_hickory, shellbark, shellbark_hickory, Carya_ovata\nmockernut, mockernut_hickory, black_hickory, white-heart_hickory, big-bud_hickory, Carya_tomentosa\nwing_nut, wing-nut\nCaucasian_walnut, Pterocarya_fraxinifolia\ndhawa, dhava\ncombretum\nhiccup_nut, hiccough_nut, Combretum_bracteosum\nbush_willow, Combretum_appiculatum\nbush_willow, Combretum_erythrophyllum\nbutton_tree, button_mangrove, Conocarpus_erectus\nwhite_mangrove, Laguncularia_racemosa\noleaster\nwater_milfoil\nanchovy_pear, anchovy_pear_tree, Grias_cauliflora\nbrazil_nut, brazil-nut_tree, Bertholletia_excelsa\nloosestrife\npurple_loosestrife, spiked_loosestrife, Lythrum_salicaria\ngrass_poly, hyssop_loosestrife, Lythrum_hyssopifolia\ncrape_myrtle, crepe_myrtle, crepe_flower, Lagerstroemia_indica\nQueen's_crape_myrtle, pride-of-India, Lagerstroemia_speciosa\nmyrtaceous_tree\nmyrtle\ncommon_myrtle, Myrtus_communis\nbayberry, bay-rum_tree, Jamaica_bayberry, wild_cinnamon, Pimenta_acris\nallspice, allspice_tree, pimento_tree, Pimenta_dioica\nallspice_tree, Pimenta_officinalis\nsour_cherry, Eugenia_corynantha\nnakedwood, Eugenia_dicrana\nSurinam_cherry, pitanga, Eugenia_uniflora\nrose_apple, rose-apple_tree, jambosa, Eugenia_jambos\nfeijoa, feijoa_bush\njaboticaba, jaboticaba_tree, Myrciaria_cauliflora\nguava, true_guava, guava_bush, Psidium_guajava\nguava, strawberry_guava, yellow_cattley_guava, Psidium_littorale\ncattley_guava, purple_strawberry_guava, Psidium_cattleianum, Psidium_littorale_longipes\nBrazilian_guava, Psidium_guineense\ngum_tree, gum\neucalyptus, eucalypt, eucalyptus_tree\nflooded_gum\nmallee\nstringybark\nsmoothbark\nred_gum, peppermint, peppermint_gum, Eucalyptus_amygdalina\nred_gum, marri, Eucalyptus_calophylla\nriver_red_gum, river_gum, Eucalyptus_camaldulensis, Eucalyptus_rostrata\nmountain_swamp_gum, Eucalyptus_camphora\nsnow_gum, ghost_gum, white_ash, Eucalyptus_coriacea, Eucalyptus_pauciflora\nalpine_ash, mountain_oak, Eucalyptus_delegatensis\nwhite_mallee, congoo_mallee, Eucalyptus_dumosa\nwhite_stringybark, thin-leaved_stringybark, Eucalyptusd_eugenioides\nwhite_mountain_ash, Eucalyptus_fraxinoides\nblue_gum, fever_tree, Eucalyptus_globulus\nrose_gum, Eucalypt_grandis\ncider_gum, Eucalypt_gunnii\nswamp_gum, Eucalypt_ovata\nspotted_gum, Eucalyptus_maculata\nlemon-scented_gum, Eucalyptus_citriodora, Eucalyptus_maculata_citriodora\nblack_mallee, black_sally, black_gum, Eucalytus_stellulata\nforest_red_gum, Eucalypt_tereticornis\nmountain_ash, Eucalyptus_regnans\nmanna_gum, Eucalyptus_viminalis\nclove, clove_tree, Syzygium_aromaticum, Eugenia_aromaticum, Eugenia_caryophyllatum\nclove\ntupelo, tupelo_tree\nwater_gum, Nyssa_aquatica\nsour_gum, black_gum, pepperidge, Nyssa_sylvatica\nenchanter's_nightshade\nCircaea_lutetiana\nwillowherb\nfireweed, giant_willowherb, rosebay_willowherb, wickup, Epilobium_angustifolium\nCalifornia_fuchsia, humming_bird's_trumpet, Epilobium_canum_canum, Zauschneria_californica\nfuchsia\nlady's-eardrop, ladies'-eardrop, lady's-eardrops, ladies'-eardrops, Fuchsia_coccinea\nevening_primrose\ncommon_evening_primrose, German_rampion, Oenothera_biennis\nsundrops, Oenothera_fruticosa\nMissouri_primrose, Ozark_sundrops, Oenothera_macrocarpa\npomegranate, pomegranate_tree, Punica_granatum\nmangrove, Rhizophora_mangle\ndaphne\ngarland_flower, Daphne_cneorum\nspurge_laurel, wood_laurel, Daphne_laureola\nmezereon, February_daphne, Daphne_mezereum\nIndian_rhododendron, Melastoma_malabathricum\nMedinilla_magnifica\ndeer_grass, meadow_beauty\ncanna\nachira, indian_shot, arrowroot, Canna_indica, Canna_edulis\narrowroot, American_arrowroot, obedience_plant, Maranta_arundinaceae\nbanana, banana_tree\ndwarf_banana, Musa_acuminata\nJapanese_banana, Musa_basjoo\nplantain, plantain_tree, Musa_paradisiaca\nedible_banana, Musa_paradisiaca_sapientum\nabaca, Manila_hemp, Musa_textilis\nAbyssinian_banana, Ethiopian_banana, Ensete_ventricosum, Musa_ensete\nginger\ncommon_ginger, Canton_ginger, stem_ginger, Zingiber_officinale\nturmeric, Curcuma_longa, Curcuma_domestica\ngalangal, Alpinia_galanga\nshellflower, shall-flower, shell_ginger, Alpinia_Zerumbet, Alpinia_speciosa, Languas_speciosa\ngrains_of_paradise, Guinea_grains, Guinea_pepper, melagueta_pepper, Aframomum_melegueta\ncardamom, cardamon, Elettaria_cardamomum\nbegonia\nfibrous-rooted_begonia\ntuberous_begonia\nrhizomatous_begonia\nChristmas_begonia, blooming-fool_begonia, Begonia_cheimantha\nangel-wing_begonia, Begonia_cocchinea\nbeefsteak_begonia, kidney_begonia, Begonia_erythrophylla, Begonia_feastii\nstar_begonia, star-leaf_begonia, Begonia_heracleifolia\nrex_begonia, king_begonia, painted-leaf_begonia, beefsteak_geranium, Begonia_rex\nwax_begonia, Begonia_semperflorens\nSocotra_begonia, Begonia_socotrana\nhybrid_tuberous_begonia, Begonia_tuberhybrida\ndillenia\nguinea_gold_vine, guinea_flower\npoon\ncalaba, Santa_Maria_tree, Calophyllum_calaba\nMaria, Calophyllum_longifolium\nlaurelwood, lancewood_tree, Calophyllum_candidissimum\nAlexandrian_laurel, Calophyllum_inophyllum\nclusia\nwild_fig, Clusia_flava\nwaxflower, Clusia_insignis\npitch_apple, strangler_fig, Clusia_rosea, Clusia_major\nmangosteen, mangosteen_tree, Garcinia_mangostana\ngamboge_tree, Garcinia_hanburyi, Garcinia_cambogia, Garcinia_gummi-gutta\nSt_John's_wort\ncommon_St_John's_wort, tutsan, Hypericum_androsaemum\ngreat_St_John's_wort, Hypericum_ascyron, Hypericum_pyramidatum\ncreeping_St_John's_wort, Hypericum_calycinum\nlow_St_Andrew's_cross, Hypericum_hypericoides\nklammath_weed, Hypericum_perforatum\nshrubby_St_John's_wort, Hypericum_prolificum, Hypericum_spathulatum\nSt_Peter's_wort, Hypericum_tetrapterum, Hypericum_maculatum\nmarsh_St-John's_wort, Hypericum_virginianum\nmammee_apple, mammee, mamey, mammee_tree, Mammea_americana\nrose_chestnut, ironwood, ironwood_tree, Mesua_ferrea\nbower_actinidia, tara_vine, Actinidia_arguta\nChinese_gooseberry, kiwi, kiwi_vine, Actinidia_chinensis, Actinidia_deliciosa\nsilvervine, silver_vine, Actinidia_polygama\nwild_cinnamon, white_cinnamon_tree, Canella_winterana, Canella-alba\npapaya, papaia, pawpaw, papaya_tree, melon_tree, Carica_papaya\nsouari, souari_nut, souari_tree, Caryocar_nuciferum\nrockrose, rock_rose\nwhite-leaved_rockrose, Cistus_albidus\ncommon_gum_cistus, Cistus_ladanifer, Cistus_ladanum\nfrostweed, frost-weed, frostwort, Helianthemum_canadense, Crocanthemum_canadense\ndipterocarp\nred_lauan, red_lauan_tree, Shorea_teysmanniana\ngovernor's_plum, governor_plum, Madagascar_plum, ramontchi, batoko_palm, Flacourtia_indica\nkei_apple, kei_apple_bush, Dovyalis_caffra\nketembilla, kitembilla, kitambilla, ketembilla_tree, Ceylon_gooseberry, Dovyalis_hebecarpa\nchaulmoogra, chaulmoogra_tree, chaulmugra, Hydnocarpus_kurzii, Taraktagenos_kurzii, Taraktogenos_kurzii\nwild_peach, Kiggelaria_africana\ncandlewood\nboojum_tree, cirio, Fouquieria_columnaris, Idria_columnaris\nbird's-eye_bush, Ochna_serrulata\ngranadilla, purple_granadillo, Passiflora_edulis\ngranadilla, sweet_granadilla, Passiflora_ligularis\ngranadilla, giant_granadilla, Passiflora_quadrangularis\nmaypop, Passiflora_incarnata\nJamaica_honeysuckle, yellow_granadilla, Passiflora_laurifolia\nbanana_passion_fruit, Passiflora_mollissima\nsweet_calabash, Passiflora_maliformis\nlove-in-a-mist, running_pop, wild_water_lemon, Passiflora_foetida\nreseda\nmignonette, sweet_reseda, Reseda_odorata\ndyer's_rocket, dyer's_mignonette, weld, Reseda_luteola\nfalse_tamarisk, German_tamarisk, Myricaria_germanica\nhalophyte\nviola\nviolet\nfield_pansy, heartsease, Viola_arvensis\nAmerican_dog_violet, Viola_conspersa\ndog_violet, heath_violet, Viola_canina\nhorned_violet, tufted_pansy, Viola_cornuta\ntwo-eyed_violet, heartsease, Viola_ocellata\nbird's-foot_violet, pansy_violet, Johnny-jump-up, wood_violet, Viola_pedata\ndowny_yellow_violet, Viola_pubescens\nlong-spurred_violet, Viola_rostrata\npale_violet, striped_violet, cream_violet, Viola_striata\nhedge_violet, wood_violet, Viola_sylvatica, Viola_reichenbachiana\nnettle\nstinging_nettle, Urtica_dioica\nRoman_nettle, Urtica_pipulifera\nramie, ramee, Chinese_silk_plant, China_grass, Boehmeria_nivea\nwood_nettle, Laportea_canadensis\nAustralian_nettle, Australian_nettle_tree\npellitory-of-the-wall, wall_pellitory, pellitory, Parietaria_difussa\nrichweed, clearweed, dead_nettle, Pilea_pumilla\nartillery_plant, Pilea_microphylla\nfriendship_plant, panamica, panamiga, Pilea_involucrata\nQueensland_grass-cloth_plant, Pipturus_argenteus\nPipturus_albidus\ncannabis, hemp\nIndian_hemp, Cannabis_indica\nmulberry, mulberry_tree\nwhite_mulberry, Morus_alba\nblack_mulberry, Morus_nigra\nred_mulberry, Morus_rubra\nosage_orange, bow_wood, mock_orange, Maclura_pomifera\nbreadfruit, breadfruit_tree, Artocarpus_communis, Artocarpus_altilis\njackfruit, jackfruit_tree, Artocarpus_heterophyllus\nmarang, marang_tree, Artocarpus_odoratissima\nfig_tree\nfig, common_fig, common_fig_tree, Ficus_carica\ncaprifig, Ficus_carica_sylvestris\ngolden_fig, Florida_strangler_fig, strangler_fig, wild_fig, Ficus_aurea\nbanyan, banyan_tree, banian, banian_tree, Indian_banyan, East_Indian_fig_tree, Ficus_bengalensis\npipal, pipal_tree, pipul, peepul, sacred_fig, bo_tree, Ficus_religiosa\nIndia-rubber_tree, India-rubber_plant, India-rubber_fig, rubber_plant, Assam_rubber, Ficus_elastica\nmistletoe_fig, mistletoe_rubber_plant, Ficus_diversifolia, Ficus_deltoidea\nPort_Jackson_fig, rusty_rig, little-leaf_fig, Botany_Bay_fig, Ficus_rubiginosa\nsycamore, sycamore_fig, mulberry_fig, Ficus_sycomorus\npaper_mulberry, Broussonetia_papyrifera\ntrumpetwood, trumpet-wood, trumpet_tree, snake_wood, imbauba, Cecropia_peltata\nelm, elm_tree\nwinged_elm, wing_elm, Ulmus_alata\nAmerican_elm, white_elm, water_elm, rock_elm, Ulmus_americana\nsmooth-leaved_elm, European_field_elm, Ulmus_carpinifolia\ncedar_elm, Ulmus_crassifolia\nwitch_elm, wych_elm, Ulmus_glabra\nDutch_elm, Ulmus_hollandica\nHuntingdon_elm, Ulmus_hollandica_vegetata\nwater_elm, Ulmus_laevis\nChinese_elm, Ulmus_parvifolia\nEnglish_elm, European_elm, Ulmus_procera\nSiberian_elm, Chinese_elm, dwarf_elm, Ulmus_pumila\nslippery_elm, red_elm, Ulmus_rubra\nJersey_elm, guernsey_elm, wheately_elm, Ulmus_sarniensis, Ulmus_campestris_sarniensis, Ulmus_campestris_wheatleyi\nSeptember_elm, red_elm, Ulmus_serotina\nrock_elm, Ulmus_thomasii\nhackberry, nettle_tree\nEuropean_hackberry, Mediterranean_hackberry, Celtis_australis\nAmerican_hackberry, Celtis_occidentalis\nsugarberry, Celtis_laevigata\niridaceous_plant\nbearded_iris\nbeardless_iris\norrisroot, orris\ndwarf_iris, Iris_cristata\nDutch_iris, Iris_filifolia\nFlorentine_iris, orris, Iris_germanica_florentina, Iris_florentina\nstinking_iris, gladdon, gladdon_iris, stinking_gladwyn, roast_beef_plant, Iris_foetidissima\nGerman_iris, Iris_germanica\nJapanese_iris, Iris_kaempferi\nGerman_iris, Iris_kochii\nDalmatian_iris, Iris_pallida\nPersian_iris, Iris_persica\nDutch_iris, Iris_tingitana\ndwarf_iris, vernal_iris, Iris_verna\nSpanish_iris, xiphium_iris, Iris_xiphium\nblackberry-lily, leopard_lily, Belamcanda_chinensis\ncrocus\nsaffron, saffron_crocus, Crocus_sativus\ncorn_lily\nblue-eyed_grass\nwandflower, Sparaxis_tricolor\namaryllis\nsalsilla, Bomarea_edulis\nsalsilla, Bomarea_salsilla\nblood_lily\nCape_tulip, Haemanthus_coccineus\nhippeastrum, Hippeastrum_puniceum\nnarcissus\ndaffodil, Narcissus_pseudonarcissus\njonquil, Narcissus_jonquilla\njonquil\nJacobean_lily, Aztec_lily, Strekelia_formosissima\nliliaceous_plant\nmountain_lily, Lilium_auratum\nCanada_lily, wild_yellow_lily, meadow_lily, wild_meadow_lily, Lilium_canadense\ntiger_lily, leopard_lily, pine_lily, Lilium_catesbaei\nColumbia_tiger_lily, Oregon_lily, Lilium_columbianum\ntiger_lily, devil_lily, kentan, Lilium_lancifolium\nEaster_lily, Bermuda_lily, white_trumpet_lily, Lilium_longiflorum\ncoast_lily, Lilium_maritinum\nTurk's-cap, martagon, Lilium_martagon\nMichigan_lily, Lilium_michiganense\nleopard_lily, panther_lily, Lilium_pardalinum\nTurk's-cap, Turk's_cap-lily, Lilium_superbum\nAfrican_lily, African_tulip, blue_African_lily, Agapanthus_africanus\ncolicroot, colic_root, crow_corn, star_grass, unicorn_root\nague_root, ague_grass, Aletris_farinosa\nyellow_colicroot, Aletris_aurea\nalliaceous_plant\nHooker's_onion, Allium_acuminatum\nwild_leek, Levant_garlic, kurrat, Allium_ampeloprasum\nCanada_garlic, meadow_leek, rose_leek, Allium_canadense\nkeeled_garlic, Allium_carinatum\nonion\nshallot, eschalot, multiplier_onion, Allium_cepa_aggregatum, Allium_ascalonicum\nnodding_onion, nodding_wild_onion, lady's_leek, Allium_cernuum\nWelsh_onion, Japanese_leek, Allium_fistulosum\nred-skinned_onion, Allium_haematochiton\ndaffodil_garlic, flowering_onion, Naples_garlic, Allium_neopolitanum\nfew-flowered_leek, Allium_paradoxum\ngarlic, Allium_sativum\nsand_leek, giant_garlic, Spanish_garlic, rocambole, Allium_scorodoprasum\nchives, chive, cive, schnittlaugh, Allium_schoenoprasum\ncrow_garlic, false_garlic, field_garlic, stag's_garlic, wild_garlic, Allium_vineale\nwild_garlic, wood_garlic, Ramsons, Allium_ursinum\ngarlic_chive, Chinese_chive, Oriental_garlic, Allium_tuberosum\nround-headed_leek, Allium_sphaerocephalum\nthree-cornered_leek, triquetrous_leek, Allium_triquetrum\ncape_aloe, Aloe_ferox\nkniphofia, tritoma, flame_flower, flame-flower, flameflower\npoker_plant, Kniphofia_uvaria\nred-hot_poker, Kniphofia_praecox\nfly_poison, Amianthum_muscaetoxicum, Amianthum_muscitoxicum\namber_lily, Anthericum_torreyi\nasparagus, edible_asparagus, Asparagus_officinales\nasparagus_fern, Asparagus_setaceous, Asparagus_plumosus\nsmilax, Asparagus_asparagoides\nasphodel\nJacob's_rod\naspidistra, cast-iron_plant, bar-room_plant, Aspidistra_elatio\ncoral_drops, Bessera_elegans\nChristmas_bells\nclimbing_onion, Bowiea_volubilis\nmariposa, mariposa_tulip, mariposa_lily\nglobe_lily, fairy_lantern\ncat's-ear\nwhite_globe_lily, white_fairy_lantern, Calochortus_albus\nyellow_globe_lily, golden_fairy_lantern, Calochortus_amabilis\nrose_globe_lily, Calochortus_amoenus\nstar_tulip, elegant_cat's_ears, Calochortus_elegans\ndesert_mariposa_tulip, Calochortus_kennedyi\nyellow_mariposa_tulip, Calochortus_luteus\nsagebrush_mariposa_tulip, Calochortus_macrocarpus\nsego_lily, Calochortus_nuttallii\ncamas, camass, quamash, camosh, camash\ncommon_camas, Camassia_quamash\nLeichtlin's_camas, Camassia_leichtlinii\nwild_hyacinth, indigo_squill, Camassia_scilloides\ndogtooth_violet, dogtooth, dog's-tooth_violet\nwhite_dogtooth_violet, white_dog's-tooth_violet, blonde_lilian, Erythronium_albidum\nyellow_adder's_tongue, trout_lily, amberbell, Erythronium_americanum\nEuropean_dogtooth, Erythronium_dens-canis\nfawn_lily, Erythronium_californicum\nglacier_lily, snow_lily, Erythronium_grandiflorum\navalanche_lily, Erythronium_montanum\nfritillary, checkered_lily\nmission_bells, rice-grain_fritillary, Fritillaria_affinis, Fritillaria_lanceolata, Fritillaria_mutica\nmission_bells, black_fritillary, Fritillaria_biflora\nstink_bell, Fritillaria_agrestis\ncrown_imperial, Fritillaria_imperialis\nwhite_fritillary, Fritillaria_liliaceae\nsnake's_head_fritillary, guinea-hen_flower, checkered_daffodil, leper_lily, Fritillaria_meleagris\nadobe_lily, pink_fritillary, Fritillaria_pluriflora\nscarlet_fritillary, Fritillaria_recurva\ntulip\ndwarf_tulip, Tulipa_armena, Tulipa_suaveolens\nlady_tulip, candlestick_tulip, Tulipa_clusiana\nTulipa_gesneriana\ncottage_tulip\nDarwin_tulip\ngloriosa, glory_lily, climbing_lily, creeping_lily, Gloriosa_superba\nlemon_lily, Hemerocallis_lilio-asphodelus, Hemerocallis_flava\ncommon_hyacinth, Hyacinthus_orientalis\nRoman_hyacinth, Hyacinthus_orientalis_albulus\nsummer_hyacinth, cape_hyacinth, Hyacinthus_candicans, Galtonia_candicans\nstar-of-Bethlehem\nbath_asparagus, Prussian_asparagus, Ornithogalum_pyrenaicum\ngrape_hyacinth\ncommon_grape_hyacinth, Muscari_neglectum\ntassel_hyacinth, Muscari_comosum\nscilla, squill\nspring_squill, Scilla_verna, sea_onion\nfalse_asphodel\nScotch_asphodel, Tofieldia_pusilla\nsea_squill, sea_onion, squill, Urginea_maritima\nsquill\nbutcher's_broom, Ruscus_aculeatus\nbog_asphodel\nEuropean_bog_asphodel, Narthecium_ossifragum\nAmerican_bog_asphodel, Narthecium_americanum\nhellebore, false_hellebore\nwhite_hellebore, American_hellebore, Indian_poke, bugbane, Veratrum_viride\nsquaw_grass, bear_grass, Xerophyllum_tenax\ndeath_camas, zigadene\nalkali_grass, Zigadenus_elegans\nwhite_camas, Zigadenus_glaucus\npoison_camas, Zigadenus_nuttalli\ngrassy_death_camas, Zigadenus_venenosus, Zigadenus_venenosus_gramineus\nprairie_wake-robin, prairie_trillium, Trillium_recurvatum\ndwarf-white_trillium, snow_trillium, early_wake-robin\nherb_Paris, Paris_quadrifolia\nsarsaparilla\nbullbrier, greenbrier, catbrier, horse_brier, horse-brier, brier, briar, Smilax_rotundifolia\nrough_bindweed, Smilax_aspera\nclintonia, Clinton's_lily\nfalse_lily_of_the_valley, Maianthemum_canadense\nfalse_lily_of_the_valley, Maianthemum_bifolium\nSolomon's-seal\ngreat_Solomon's-seal, Polygonatum_biflorum, Polygonatum_commutatum\nbellwort, merry_bells, wild_oats\nstrawflower, cornflower, Uvularia_grandiflora\npia, Indian_arrowroot, Tacca_leontopetaloides, Tacca_pinnatifida\nagave, century_plant, American_aloe\nAmerican_agave, Agave_americana\nsisal, Agave_sisalana\nmaguey, cantala, Agave_cantala\nmaguey, Agave_atrovirens\nAgave_tequilana\ncabbage_tree, grass_tree, Cordyline_australis\ndracaena\ntuberose, Polianthes_tuberosa\nsansevieria, bowstring_hemp\nAfrican_bowstring_hemp, African_hemp, Sansevieria_guineensis\nCeylon_bowstring_hemp, Sansevieria_zeylanica\nmother-in-law's_tongue, snake_plant, Sansevieria_trifasciata\nSpanish_bayonet, Yucca_aloifolia\nSpanish_bayonet, Yucca_baccata\nJoshua_tree, Yucca_brevifolia\nsoapweed, soap-weed, soap_tree, Yucca_elata\nAdam's_needle, Adam's_needle-and-thread, spoonleaf_yucca, needle_palm, Yucca_filamentosa\nbear_grass, Yucca_glauca\nSpanish_dagger, Yucca_gloriosa\nOur_Lord's_candle, Yucca_whipplei\nwater_shamrock, buckbean, bogbean, bog_myrtle, marsh_trefoil, Menyanthes_trifoliata\nbutterfly_bush, buddleia\nyellow_jasmine, yellow_jessamine, Carolina_jasmine, evening_trumpet_flower, Gelsemium_sempervirens\nflax\ncalabar_bean, ordeal_bean\nbonduc, bonduc_tree, Caesalpinia_bonduc, Caesalpinia_bonducella\ndivi-divi, Caesalpinia_coriaria\nMysore_thorn, Caesalpinia_decapetala, Caesalpinia_sepiaria\nbrazilian_ironwood, Caesalpinia_ferrea\nbird_of_paradise, poinciana, Caesalpinia_gilliesii, Poinciana_gilliesii\nshingle_tree, Acrocarpus_fraxinifolius\nmountain_ebony, orchid_tree, Bauhinia_variegata\nmsasa, Brachystegia_speciformis\ncassia\ngolden_shower_tree, drumstick_tree, purging_cassia, pudding_pipe_tree, canafistola, canafistula, Cassia_fistula\npink_shower, pink_shower_tree, horse_cassia, Cassia_grandis\nrainbow_shower, Cassia_javonica\nhorse_cassia, Cassia_roxburghii, Cassia_marginata\ncarob, carob_tree, carob_bean_tree, algarroba, Ceratonia_siliqua\ncarob, carob_bean, algarroba_bean, algarroba, locust_bean, locust_pod\npaloverde\nroyal_poinciana, flamboyant, flame_tree, peacock_flower, Delonix_regia, Poinciana_regia\nlocust_tree, locust\nwater_locust, swamp_locust, Gleditsia_aquatica\nhoney_locust, Gleditsia_triacanthos\nKentucky_coffee_tree, bonduc, chicot, Gymnocladus_dioica\nlogwood, logwood_tree, campeachy, bloodwood_tree, Haematoxylum_campechianum\nJerusalem_thorn, horsebean, Parkinsonia_aculeata\npalo_verde, Parkinsonia_florida, Cercidium_floridum\nDalmatian_laburnum, Petteria_ramentacea, Cytisus_ramentaceus\nsenna\navaram, tanner's_cassia, Senna_auriculata, Cassia_auriculata\nAlexandria_senna, Alexandrian_senna, true_senna, tinnevelly_senna, Indian_senna, Senna_alexandrina, Cassia_acutifolia, Cassia_augustifolia\nwild_senna, Senna_marilandica, Cassia_marilandica\nsicklepod, Senna_obtusifolia, Cassia_tora\ncoffee_senna, mogdad_coffee, styptic_weed, stinking_weed, Senna_occidentalis, Cassia_occidentalis\ntamarind, tamarind_tree, tamarindo, Tamarindus_indica\nfalse_indigo, bastard_indigo, Amorpha_californica\nfalse_indigo, bastard_indigo, Amorpha_fruticosa\nhog_peanut, wild_peanut, Amphicarpaea_bracteata, Amphicarpa_bracteata\nangelim, andelmin\ncabbage_bark, cabbage-bark_tree, cabbage_tree, Andira_inermis\nkidney_vetch, Anthyllis_vulneraria\ngroundnut, groundnut_vine, Indian_potato, potato_bean, wild_bean, Apios_americana, Apios_tuberosa\nrooibos, Aspalathus_linearis, Aspalathus_cedcarbergensis\nmilk_vetch, milk-vetch\nalpine_milk_vetch, Astragalus_alpinus\npurple_milk_vetch, Astragalus_danicus\ncamwood, African_sandalwood, Baphia_nitida\nwild_indigo, false_indigo\nblue_false_indigo, Baptisia_australis\nwhite_false_indigo, Baptisia_lactea\nindigo_broom, horsefly_weed, rattle_weed, Baptisia_tinctoria\ndhak, dak, palas, Butea_frondosa, Butea_monosperma\npigeon_pea, pigeon-pea_plant, cajan_pea, catjang_pea, red_gram, dhal, dahl, Cajanus_cajan\nsword_bean, Canavalia_gladiata\npea_tree, caragana\nSiberian_pea_tree, Caragana_arborescens\nChinese_pea_tree, Caragana_sinica\nMoreton_Bay_chestnut, Australian_chestnut\nbutterfly_pea, Centrosema_virginianum\nJudas_tree, love_tree, Circis_siliquastrum\nredbud, Cercis_canadensis\nwestern_redbud, California_redbud, Cercis_occidentalis\ntagasaste, Chamaecytisus_palmensis, Cytesis_proliferus\nweeping_tree_broom\nflame_pea\nchickpea, chickpea_plant, Egyptian_pea, Cicer_arietinum\nchickpea, garbanzo\nKentucky_yellowwood, gopherwood, Cladrastis_lutea, Cladrastis_kentukea\nglory_pea, clianthus\ndesert_pea, Sturt_pea, Sturt's_desert_pea, Clianthus_formosus, Clianthus_speciosus\nparrot's_beak, parrot's_bill, Clianthus_puniceus\nbutterfly_pea, Clitoria_mariana\nblue_pea, butterfly_pea, Clitoria_turnatea\ntelegraph_plant, semaphore_plant, Codariocalyx_motorius, Desmodium_motorium, Desmodium_gyrans\nbladder_senna, Colutea_arborescens\naxseed, crown_vetch, Coronilla_varia\ncrotalaria, rattlebox\nguar, cluster_bean, Cyamopsis_tetragonolobus, Cyamopsis_psoraloides\nwhite_broom, white_Spanish_broom, Cytisus_albus, Cytisus_multiflorus\ncommon_broom, Scotch_broom, green_broom, Cytisus_scoparius\nrosewood, rosewood_tree\nIndian_blackwood, East_Indian_rosewood, East_India_rosewood, Indian_rosewood, Dalbergia_latifolia\nsissoo, sissu, sisham, Dalbergia_sissoo\nkingwood, kingwood_tree, Dalbergia_cearensis\nBrazilian_rosewood, caviuna_wood, jacaranda, Dalbergia_nigra\ncocobolo, Dalbergia_retusa\nblackwood, blackwood_tree\nbitter_pea\nderris\nderris_root, tuba_root, Derris_elliptica\nprairie_mimosa, prickle-weed, Desmanthus_ilinoensis\ntick_trefoil, beggar_lice, beggar's_lice\nbeggarweed, Desmodium_tortuosum, Desmodium_purpureum\nAustralian_pea, Dipogon_lignosus, Dolichos_lignosus\ncoral_tree, erythrina\nkaffir_boom, Cape_kafferboom, Erythrina_caffra\ncoral_bean_tree, Erythrina_corallodendrum\nceibo, crybaby_tree, cry-baby_tree, common_coral_tree, Erythrina_crista-galli\nkaffir_boom, Transvaal_kafferboom, Erythrina_lysistemon\nIndian_coral_tree, Erythrina_variegata, Erythrina_Indica\ncork_tree, Erythrina_vespertilio\ngoat's_rue, goat_rue, Galega_officinalis\npoison_bush, poison_pea, gastrolobium\nSpanish_broom, Spanish_gorse, Genista_hispanica\nwoodwaxen, dyer's_greenweed, dyer's-broom, dyeweed, greenweed, whin, woadwaxen, Genista_tinctoria\nchanar, chanal, Geoffroea_decorticans\ngliricidia\nsoy, soybean, soya_bean\nlicorice, liquorice, Glycyrrhiza_glabra\nwild_licorice, wild_liquorice, American_licorice, American_liquorice, Glycyrrhiza_lepidota\nlicorice_root\nWestern_Australia_coral_pea, Hardenbergia_comnptoniana\nsweet_vetch, Hedysarum_boreale\nFrench_honeysuckle, sulla, Hedysarum_coronarium\nanil, Indigofera_suffruticosa, Indigofera_anil\nscarlet_runner, running_postman, Kennedia_prostrata\nhyacinth_bean, bonavist, Indian_bean, Egyptian_bean, Lablab_purpureus, Dolichos_lablab\nScotch_laburnum, Alpine_golden_chain, Laburnum_alpinum\nvetchling\nwild_pea\neverlasting_pea\nbeach_pea, sea_pea, Lathyrus_maritimus, Lathyrus_japonicus\ngrass_vetch, grass_vetchling, Lathyrus_nissolia\nmarsh_pea, Lathyrus_palustris\ncommon_vetchling, meadow_pea, yellow_vetchling, Lathyrus_pratensis\ngrass_pea, Indian_pea, khesari, Lathyrus_sativus\nTangier_pea, Tangier_peavine, Lalthyrus_tingitanus\nheath_pea, earth-nut_pea, earthnut_pea, tuberous_vetch, Lathyrus_tuberosus\nbicolor_lespediza, ezo-yama-hagi, Lespedeza_bicolor\njapanese_clover, japan_clover, jap_clover, Lespedeza_striata\nKorean_lespedeza, Lespedeza_stipulacea\nsericea_lespedeza, Lespedeza_sericea, Lespedeza_cuneata\nlentil, lentil_plant, Lens_culinaris\nlentil\nprairie_bird's-foot_trefoil, compass_plant, prairie_lotus, prairie_trefoil, Lotus_americanus\nbird's_foot_trefoil, bird's_foot_clover, babies'_slippers, bacon_and_eggs, Lotus_corniculatus\nwinged_pea, asparagus_pea, Lotus_tetragonolobus\nlupine, lupin\nwhite_lupine, field_lupine, wolf_bean, Egyptian_lupine, Lupinus_albus\ntree_lupine, Lupinus_arboreus\nwild_lupine, sundial_lupine, Indian_beet, old-maid's_bonnet, Lupinus_perennis\nbluebonnet, buffalo_clover, Texas_bluebonnet, Lupinus_subcarnosus\nTexas_bluebonnet, Lupinus_texensis\nmedic, medick, trefoil\nmoon_trefoil, Medicago_arborea\nsickle_alfalfa, sickle_lucerne, sickle_medick, Medicago_falcata\nCalvary_clover, Medicago_intertexta, Medicago_echinus\nblack_medick, hop_clover, yellow_trefoil, nonesuch_clover, Medicago_lupulina\nalfalfa, lucerne, Medicago_sativa\nmillettia\nmucuna\ncowage, velvet_bean, Bengal_bean, Benghal_bean, Florida_bean, Mucuna_pruriens_utilis, Mucuna_deeringiana, Mucuna_aterrima, Stizolobium_deeringiana\ntolu_tree, tolu_balsam_tree, Myroxylon_balsamum, Myroxylon_toluiferum\nPeruvian_balsam, Myroxylon_pereirae, Myroxylon_balsamum_pereirae\nsainfoin, sanfoin, holy_clover, esparcet, Onobrychis_viciifolia, Onobrychis_viciaefolia\nrestharrow, rest-harrow, Ononis_repens\nbead_tree, jumby_bean, jumby_tree, Ormosia_monosperma\njumby_bead, jumbie_bead, Ormosia_coarctata\nlocoweed, crazyweed, crazy_weed\npurple_locoweed, purple_loco, Oxytropis_lambertii\ntumbleweed\nyam_bean, Pachyrhizus_erosus\nshamrock_pea, Parochetus_communis\npole_bean\nkidney_bean, frijol, frijole\nharicot\nwax_bean\nscarlet_runner, scarlet_runner_bean, Dutch_case-knife_bean, runner_bean, Phaseolus_coccineus, Phaseolus_multiflorus\nlima_bean, lima_bean_plant, Phaseolus_limensis\nsieva_bean, butter_bean, butter-bean_plant, lima_bean, Phaseolus_lunatus\ntepary_bean, Phaseolus_acutifolius_latifolius\nchaparral_pea, stingaree-bush, Pickeringia_montana\nJamaica_dogwood, fish_fuddle, Piscidia_piscipula, Piscidia_erythrina\npea\ngarden_pea\nedible-pod_pea, edible-podded_pea, Pisum_sativum_macrocarpon\nsugar_snap_pea, snap_pea\nfield_pea, field-pea_plant, Austrian_winter_pea, Pisum_sativum_arvense, Pisum_arvense\nfield_pea\ncommon_flat_pea, native_holly, Playlobium_obtusangulum\nquira\nroble, Platymiscium_trinitatis\nPanama_redwood_tree, Panama_redwood, Platymiscium_pinnatum\nIndian_beech, Pongamia_glabra\nwinged_bean, winged_pea, goa_bean, goa_bean_vine, Manila_bean, Psophocarpus_tetragonolobus\nbreadroot, Indian_breadroot, pomme_blanche, pomme_de_prairie, Psoralea_esculenta\nbloodwood_tree, kiaat, Pterocarpus_angolensis\nkino, Pterocarpus_marsupium\nred_sandalwood, red_sanders, red_sanderswood, red_saunders, Pterocarpus_santalinus\nkudzu, kudzu_vine, Pueraria_lobata\nbristly_locust, rose_acacia, moss_locust, Robinia_hispida\nblack_locust, yellow_locust, Robinia_pseudoacacia\nclammy_locust, Robinia_viscosa\ncarib_wood, Sabinea_carinalis\nColorado_River_hemp, Sesbania_exaltata\nscarlet_wisteria_tree, vegetable_hummingbird, Sesbania_grandiflora\nJapanese_pagoda_tree, Chinese_scholartree, Chinese_scholar_tree, Sophora_japonica, Sophora_sinensis\nmescal_bean, coral_bean, frijolito, frijolillo, Sophora_secundiflora\nkowhai, Sophora_tetraptera\njade_vine, emerald_creeper, Strongylodon_macrobotrys\nhoary_pea\nbastard_indigo, Tephrosia_purpurea\ncatgut, goat's_rue, wild_sweet_pea, Tephrosia_virginiana\nbush_pea\nfalse_lupine, golden_pea, yellow_pea, Thermopsis_macrophylla\nCarolina_lupine, Thermopsis_villosa\ntipu, tipu_tree, yellow_jacaranda, pride_of_Bolivia\nbird's_foot_trefoil, Trigonella_ornithopodioides\nfenugreek, Greek_clover, Trigonella_foenumgraecum\ngorse, furze, whin, Irish_gorse, Ulex_europaeus\nvetch\ntufted_vetch, bird_vetch, Calnada_pea, Vicia_cracca\nbroad_bean, fava_bean, horsebean\nbitter_betch, Vicia_orobus\nbush_vetch, Vicia_sepium\nmoth_bean, Vigna_aconitifolia, Phaseolus_aconitifolius\nsnailflower, snail-flower, snail_flower, snail_bean, corkscrew_flower, Vigna_caracalla, Phaseolus_caracalla\nmung, mung_bean, green_gram, golden_gram, Vigna_radiata, Phaseolus_aureus\ncowpea, cowpea_plant, black-eyed_pea, Vigna_unguiculata, Vigna_sinensis\ncowpea, black-eyed_pea\nasparagus_bean, yard-long_bean, Vigna_unguiculata_sesquipedalis, Vigna_sesquipedalis\nswamp_oak, Viminaria_juncea, Viminaria_denudata\nkeurboom, Virgilia_capensis, Virgilia_oroboides\nkeurboom, Virgilia_divaricata\nJapanese_wistaria, Wisteria_floribunda\nChinese_wistaria, Wisteria_chinensis\nAmerican_wistaria, American_wisteria, Wisteria_frutescens\nsilky_wisteria, Wisteria_venusta\npalm, palm_tree\nsago_palm\nfeather_palm\nfan_palm\npalmetto\ncoyol, coyol_palm, Acrocomia_vinifera\ngrugru, gri-gri, grugru_palm, macamba, Acrocomia_aculeata\nareca\nbetel_palm, Areca_catechu\nsugar_palm, gomuti, gomuti_palm, Arenga_pinnata\npiassava_palm, pissaba_palm, Bahia_piassava, bahia_coquilla, Attalea_funifera\ncoquilla_nut\npalmyra, palmyra_palm, toddy_palm, wine_palm, lontar, longar_palm, Borassus_flabellifer\ncalamus\nrattan, rattan_palm, Calamus_rotang\nlawyer_cane, Calamus_australis\nfishtail_palm\nwine_palm, jaggery_palm, kitul, kittul, kitul_tree, toddy_palm, Caryota_urens\nwax_palm, Ceroxylon_andicola, Ceroxylon_alpinum\ncoconut, coconut_palm, coco_palm, coco, cocoa_palm, coconut_tree, Cocos_nucifera\ncarnauba, carnauba_palm, wax_palm, Copernicia_prunifera, Copernicia_cerifera\ncaranday, caranda, caranda_palm, wax_palm, Copernicia_australis, Copernicia_alba\ncorozo, corozo_palm\ngebang_palm, Corypha_utan, Corypha_gebanga\nlatanier, latanier_palm\ntalipot, talipot_palm, Corypha_umbraculifera\noil_palm\nAfrican_oil_palm, Elaeis_guineensis\nAmerican_oil_palm, Elaeis_oleifera\npalm_nut, palm_kernel\ncabbage_palm, Euterpe_oleracea\ncabbage_palm, cabbage_tree, Livistona_australis\ntrue_sago_palm, Metroxylon_sagu\nnipa_palm, Nipa_fruticans\nbabassu, babassu_palm, coco_de_macao, Orbignya_phalerata, Orbignya_spesiosa, Orbignya_martiana\nbabassu_nut\ncohune_palm, Orbignya_cohune, cohune\ncohune_nut\ndate_palm, Phoenix_dactylifera\nivory_palm, ivory-nut_palm, ivory_plant, Phytelephas_macrocarpa\nraffia_palm, Raffia_farinifera, Raffia_ruffia\nbamboo_palm, Raffia_vinifera\nlady_palm\nminiature_fan_palm, bamboo_palm, fern_rhapis, Rhapis_excelsa\nreed_rhapis, slender_lady_palm, Rhapis_humilis\nroyal_palm, Roystonea_regia\ncabbage_palm, Roystonea_oleracea\ncabbage_palmetto, cabbage_palm, Sabal_palmetto\nsaw_palmetto, scrub_palmetto, Serenoa_repens\nthatch_palm, thatch_tree, silver_thatch, broom_palm, Thrinax_parviflora\nkey_palm, silvertop_palmetto, silver_thatch, Thrinax_microcarpa, Thrinax_morrisii, Thrinax_keyensis\nEnglish_plantain, narrow-leaved_plantain, ribgrass, ribwort, ripple-grass, buckthorn, Plantago_lanceolata\nbroad-leaved_plantain, common_plantain, white-man's_foot, whiteman's_foot, cart-track_plant, Plantago_major\nhoary_plantain, Plantago_media\nfleawort, psyllium, Spanish_psyllium, Plantago_psyllium\nrugel's_plantain, broad-leaved_plantain, Plantago_rugelii\nhoary_plantain, Plantago_virginica\nbuckwheat, Polygonum_fagopyrum, Fagopyrum_esculentum\nprince's-feather, princess_feather, kiss-me-over-the-garden-gate, prince's-plume, Polygonum_orientale\neriogonum\numbrella_plant, Eriogonum_allenii\nwild_buckwheat, California_buckwheat, Erigonum_fasciculatum\nrhubarb, rhubarb_plant\nHimalayan_rhubarb, Indian_rhubarb, red-veined_pie_plant, Rheum_australe, Rheum_emodi\npie_plant, garden_rhubarb, Rheum_cultorum, Rheum_rhabarbarum, Rheum_rhaponticum\nChinese_rhubarb, Rheum_palmatum\nsour_dock, garden_sorrel, Rumex_acetosa\nsheep_sorrel, sheep's_sorrel, Rumex_acetosella\nbitter_dock, broad-leaved_dock, yellow_dock, Rumex_obtusifolius\nFrench_sorrel, garden_sorrel, Rumex_scutatus\nyellow-eyed_grass\ncommelina\nspiderwort, dayflower\npineapple, pineapple_plant, Ananas_comosus\npipewort, Eriocaulon_aquaticum\nwater_hyacinth, water_orchid, Eichhornia_crassipes, Eichhornia_spesiosa\nwater_star_grass, mud_plantain, Heteranthera_dubia\nnaiad, water_nymph\nwater_plantain, Alisma_plantago-aquatica\nnarrow-leaved_water_plantain\nhydrilla, Hydrilla_verticillata\nAmerican_frogbit, Limnodium_spongia\nwaterweed\nCanadian_pondweed, Elodea_canadensis\ntape_grass, eelgrass, wild_celery, Vallisneria_spiralis\npondweed\ncurled_leaf_pondweed, curly_pondweed, Potamogeton_crispus\nloddon_pondweed, Potamogeton_nodosus, Potamogeton_americanus\nfrog's_lettuce\narrow_grass, Triglochin_maritima\nhorned_pondweed, Zannichellia_palustris\neelgrass, grass_wrack, sea_wrack, Zostera_marina\nrose, rosebush\nhip, rose_hip, rosehip\nbanksia_rose, Rosa_banksia\ndamask_rose, summer_damask_rose, Rosa_damascena\nsweetbrier, sweetbriar, brier, briar, eglantine, Rosa_eglanteria\nCherokee_rose, Rosa_laevigata\nmusk_rose, Rosa_moschata\nagrimonia, agrimony\nharvest-lice, Agrimonia_eupatoria\nfragrant_agrimony, Agrimonia_procera\nalderleaf_Juneberry, alder-leaved_serviceberry, Amelanchier_alnifolia\nflowering_quince\njaponica, maule's_quince, Chaenomeles_japonica\ncoco_plum, coco_plum_tree, cocoa_plum, icaco, Chrysobalanus_icaco\ncotoneaster\nCotoneaster_dammeri\nCotoneaster_horizontalis\nparsley_haw, parsley-leaved_thorn, Crataegus_apiifolia, Crataegus_marshallii\nscarlet_haw, Crataegus_biltmoreana\nblackthorn, pear_haw, pear_hawthorn, Crataegus_calpodendron, Crataegus_tomentosa\ncockspur_thorn, cockspur_hawthorn, Crataegus_crus-galli\nmayhaw, summer_haw, Crataegus_aestivalis\nred_haw, downy_haw, Crataegus_mollis, Crataegus_coccinea_mollis\nred_haw, Crataegus_pedicellata, Crataegus_coccinea\nquince, quince_bush, Cydonia_oblonga\nmountain_avens, Dryas_octopetala\nloquat, loquat_tree, Japanese_medlar, Japanese_plum, Eriobotrya_japonica\nbeach_strawberry, Chilean_strawberry, Fragaria_chiloensis\nVirginia_strawberry, scarlet_strawberry, Fragaria_virginiana\navens\nyellow_avens, Geum_alleppicum_strictum, Geum_strictum\nyellow_avens, Geum_macrophyllum\nprairie_smoke, purple_avens, Geum_triflorum\nbennet, white_avens, Geum_virginianum\ntoyon, tollon, Christmasberry, Christmas_berry, Heteromeles_arbutifolia, Photinia_arbutifolia\napple_tree\napple, orchard_apple_tree, Malus_pumila\nwild_apple, crab_apple, crabapple\ncrab_apple, crabapple, cultivated_crab_apple\nSiberian_crab, Siberian_crab_apple, cherry_apple, cherry_crab, Malus_baccata\nwild_crab, Malus_sylvestris\nAmerican_crab_apple, garland_crab, Malus_coronaria\nOregon_crab_apple, Malus_fusca\nSouthern_crab_apple, flowering_crab, Malus_angustifolia\nIowa_crab, Iowa_crab_apple, prairie_crab, western_crab_apple, Malus_ioensis\nBechtel_crab, flowering_crab\nmedlar, medlar_tree, Mespilus_germanica\ncinquefoil, five-finger\nsilverweed, goose-tansy, goose_grass, Potentilla_anserina\nsalad_burnet, burnet_bloodwort, pimpernel, Poterium_sanguisorba\nplum, plum_tree\nwild_plum, wild_plum_tree\nAllegheny_plum, Alleghany_plum, sloe, Prunus_alleghaniensis\nAmerican_red_plum, August_plum, goose_plum, Prunus_americana\nchickasaw_plum, hog_plum, hog_plum_bush, Prunus_angustifolia\nbeach_plum, beach_plum_bush, Prunus_maritima\ncommon_plum, Prunus_domestica\nbullace, Prunus_insititia\ndamson_plum, damson_plum_tree, Prunus_domestica_insititia\nbig-tree_plum, Prunus_mexicana\nCanada_plum, Prunus_nigra\nplumcot, plumcot_tree\napricot, apricot_tree\nJapanese_apricot, mei, Prunus_mume\ncommon_apricot, Prunus_armeniaca\npurple_apricot, black_apricot, Prunus_dasycarpa\ncherry, cherry_tree\nwild_cherry, wild_cherry_tree\nwild_cherry\nsweet_cherry, Prunus_avium\nheart_cherry, oxheart, oxheart_cherry\ngean, mazzard, mazzard_cherry\ncapulin, capulin_tree, Prunus_capuli\ncherry_laurel, laurel_cherry, mock_orange, wild_orange, Prunus_caroliniana\ncherry_plum, myrobalan, myrobalan_plum, Prunus_cerasifera\nsour_cherry, sour_cherry_tree, Prunus_cerasus\namarelle, Prunus_cerasus_caproniana\nmorello, Prunus_cerasus_austera\nmarasca\nalmond_tree\nalmond, sweet_almond, Prunus_dulcis, Prunus_amygdalus, Amygdalus_communis\nbitter_almond, Prunus_dulcis_amara, Amygdalus_communis_amara\njordan_almond\ndwarf_flowering_almond, Prunus_glandulosa\nholly-leaved_cherry, holly-leaf_cherry, evergreen_cherry, islay, Prunus_ilicifolia\nfuji, fuji_cherry, Prunus_incisa\nflowering_almond, oriental_bush_cherry, Prunus_japonica\ncherry_laurel, laurel_cherry, Prunus_laurocerasus\nCatalina_cherry, Prunus_lyonii\nbird_cherry, bird_cherry_tree\nhagberry_tree, European_bird_cherry, common_bird_cherry, Prunus_padus\nhagberry\npin_cherry, Prunus_pensylvanica\npeach, peach_tree, Prunus_persica\nnectarine, nectarine_tree, Prunus_persica_nectarina\nsand_cherry, Prunus_pumila, Prunus_pumilla_susquehanae, Prunus_susquehanae, Prunus_cuneata\nJapanese_plum, Prunus_salicina\nblack_cherry, black_cherry_tree, rum_cherry, Prunus_serotina\nflowering_cherry\noriental_cherry, Japanese_cherry, Japanese_flowering_cherry, Prunus_serrulata\nJapanese_flowering_cherry, Prunus_sieboldii\nSierra_plum, Pacific_plum, Prunus_subcordata\nrosebud_cherry, winter_flowering_cherry, Prunus_subhirtella\nRussian_almond, dwarf_Russian_almond, Prunus_tenella\nflowering_almond, Prunus_triloba\nchokecherry, chokecherry_tree, Prunus_virginiana\nchokecherry\nwestern_chokecherry, Prunus_virginiana_demissa, Prunus_demissa\nPyracantha, pyracanth, fire_thorn, firethorn\npear, pear_tree, Pyrus_communis\nfruit_tree\nbramble_bush\nlawyerbush, lawyer_bush, bush_lawyer, Rubus_cissoides, Rubus_australis\nstone_bramble, Rubus_saxatilis\nsand_blackberry, Rubus_cuneifolius\nboysenberry, boysenberry_bush\nloganberry, Rubus_loganobaccus, Rubus_ursinus_loganobaccus\nAmerican_dewberry, Rubus_canadensis\nNorthern_dewberry, American_dewberry, Rubus_flagellaris\nSouthern_dewberry, Rubus_trivialis\nswamp_dewberry, swamp_blackberry, Rubus_hispidus\nEuropean_dewberry, Rubus_caesius\nraspberry, raspberry_bush\nwild_raspberry, European_raspberry, framboise, Rubus_idaeus\nAmerican_raspberry, Rubus_strigosus, Rubus_idaeus_strigosus\nblack_raspberry, blackcap, blackcap_raspberry, thimbleberry, Rubus_occidentalis\nsalmonberry, Rubus_spectabilis\nsalmonberry, salmon_berry, thimbleberry, Rubus_parviflorus\nwineberry, Rubus_phoenicolasius\nmountain_ash\nrowan, rowan_tree, European_mountain_ash, Sorbus_aucuparia\nrowanberry\nAmerican_mountain_ash, Sorbus_americana\nWestern_mountain_ash, Sorbus_sitchensis\nservice_tree, sorb_apple, sorb_apple_tree, Sorbus_domestica\nwild_service_tree, Sorbus_torminalis\nspirea, spiraea\nbridal_wreath, bridal-wreath, Saint_Peter's_wreath, St._Peter's_wreath, Spiraea_prunifolia\nmadderwort, rubiaceous_plant\nIndian_madder, munjeet, Rubia_cordifolia\nmadder, Rubia_tinctorum\nwoodruff\ndagame, lemonwood_tree, Calycophyllum_candidissimum\nblolly, West_Indian_snowberry, Chiococca_alba\ncoffee, coffee_tree\nArabian_coffee, Coffea_arabica\nLiberian_coffee, Coffea_liberica\nrobusta_coffee, Rio_Nunez_coffee, Coffea_robusta, Coffea_canephora\ncinchona, chinchona\nCartagena_bark, Cinchona_cordifolia, Cinchona_lancifolia\ncalisaya, Cinchona_officinalis, Cinchona_ledgeriana, Cinchona_calisaya\ncinchona_tree, Cinchona_pubescens\ncinchona, cinchona_bark, Peruvian_bark, Jesuit's_bark\nbedstraw\nsweet_woodruff, waldmeister, woodruff, fragrant_bedstraw, Galium_odoratum, Asperula_odorata\nNorthern_bedstraw, Northern_snow_bedstraw, Galium_boreale\nyellow_bedstraw, yellow_cleavers, Our_Lady's_bedstraw, Galium_verum\nwild_licorice, Galium_lanceolatum\ncleavers, clivers, goose_grass, catchweed, spring_cleavers, Galium_aparine\nwild_madder, white_madder, white_bedstraw, infant's-breath, false_baby's_breath, Galium_mollugo\ncape_jasmine, cape_jessamine, Gardenia_jasminoides, Gardenia_augusta\ngenipa\ngenipap_fruit, jagua, marmalade_box, Genipa_Americana\nhamelia\nscarlet_bush, scarlet_hamelia, coloradillo, Hamelia_patens, Hamelia_erecta\nlemonwood, lemon-wood, lemonwood_tree, lemon-wood_tree, Psychotria_capensis\nnegro_peach, Sarcocephalus_latifolius, Sarcocephalus_esculentus\nwild_medlar, wild_medlar_tree, medlar, Vangueria_infausta\nSpanish_tamarind, Vangueria_madagascariensis\nabelia\nbush_honeysuckle, Diervilla_sessilifolia\nAmerican_twinflower, Linnaea_borealis_americana\nhoneysuckle\nAmerican_fly_honeysuckle, fly_honeysuckle, Lonicera_canadensis\nItalian_honeysuckle, Italian_woodbine, Lonicera_caprifolium\nyellow_honeysuckle, Lonicera_flava\nhairy_honeysuckle, Lonicera_hirsuta\nJapanese_honeysuckle, Lonicera_japonica\nHall's_honeysuckle, Lonicera_japonica_halliana\nMorrow's_honeysuckle, Lonicera_morrowii\nwoodbine, Lonicera_periclymenum\ntrumpet_honeysuckle, coral_honeysuckle, trumpet_flower, trumpet_vine, Lonicera_sempervirens\nEuropean_fly_honeysuckle, European_honeysuckle, Lonicera_xylosteum\nswamp_fly_honeysuckle\nsnowberry, common_snowberry, waxberry, Symphoricarpos_alba\ncoralberry, Indian_currant, Symphoricarpos_orbiculatus\nblue_elder, blue_elderberry, Sambucus_caerulea\ndwarf_elder, danewort, Sambucus_ebulus\nAmerican_red_elder, red-berried_elder, stinking_elder, Sambucus_pubens\nEuropean_red_elder, red-berried_elder, Sambucus_racemosa\nfeverroot, horse_gentian, tinker's_root, wild_coffee, Triostium_perfoliatum\ncranberry_bush, cranberry_tree, American_cranberry_bush, highbush_cranberry, Viburnum_trilobum\nwayfaring_tree, twist_wood, twistwood, Viburnum_lantana\nguelder_rose, European_cranberrybush, European_cranberry_bush, crampbark, cranberry_tree, Viburnum_opulus\narrow_wood, Viburnum_recognitum\nblack_haw, Viburnum_prunifolium\nweigela, Weigela_florida\nteasel, teazel, teasle\ncommon_teasel, Dipsacus_fullonum\nfuller's_teasel, Dipsacus_sativus\nwild_teasel, Dipsacus_sylvestris\nscabious, scabiosa\nsweet_scabious, pincushion_flower, mournful_widow, Scabiosa_atropurpurea\nfield_scabious, Scabiosa_arvensis\njewelweed, lady's_earrings, orange_balsam, celandine, touch-me-not, Impatiens_capensis\ngeranium\ncranesbill, crane's_bill\nwild_geranium, spotted_cranesbill, Geranium_maculatum\nmeadow_cranesbill, Geranium_pratense\nRichardson's_geranium, Geranium_richardsonii\nherb_robert, herbs_robert, herb_roberts, Geranium_robertianum\nsticky_geranium, Geranium_viscosissimum\ndove's_foot_geranium, Geranium_molle\nrose_geranium, sweet-scented_geranium, Pelargonium_graveolens\nfish_geranium, bedding_geranium, zonal_pelargonium, Pelargonium_hortorum\nivy_geranium, ivy-leaved_geranium, hanging_geranium, Pelargonium_peltatum\napple_geranium, nutmeg_geranium, Pelargonium_odoratissimum\nlemon_geranium, Pelargonium_limoneum\nstorksbill, heron's_bill\nmusk_clover, muskus_grass, white-stemmed_filaree, Erodium_moschatum\nincense_tree\nelephant_tree, Bursera_microphylla\ngumbo-limbo, Bursera_simaruba\nBoswellia_carteri\nsalai, Boswellia_serrata\nbalm_of_gilead, Commiphora_meccanensis\nmyrrh_tree, Commiphora_myrrha\nProtium_heptaphyllum\nProtium_guianense\nwater_starwort\nbarbados_cherry, acerola, Surinam_cherry, West_Indian_cherry, Malpighia_glabra\nmahogany, mahogany_tree\nchinaberry, chinaberry_tree, China_tree, Persian_lilac, pride-of-India, azederach, azedarach, Melia_azederach, Melia_azedarach\nneem, neem_tree, nim_tree, margosa, arishth, Azadirachta_indica, Melia_Azadirachta\nneem_seed\nSpanish_cedar, Spanish_cedar_tree, Cedrela_odorata\nsatinwood, satinwood_tree, Chloroxylon_swietenia\nAfrican_scented_mahogany, cedar_mahogany, sapele_mahogany, Entandrophragma_cylindricum\nsilver_ash\nnative_beech, flindosa, flindosy, Flindersia_australis\nbunji-bunji, Flindersia_schottiana\nAfrican_mahogany\nlanseh_tree, langsat, langset, Lansium_domesticum\ntrue_mahogany, Cuban_mahogany, Dominican_mahogany, Swietinia_mahogani\nHonduras_mahogany, Swietinia_macrophylla\nPhilippine_mahogany, Philippine_cedar, kalantas, Toona_calantas, Cedrela_calantas\ncaracolito, Ruptiliocarpon_caracolito\ncommon_wood_sorrel, cuckoo_bread, shamrock, Oxalis_acetosella\nBermuda_buttercup, English-weed, Oxalis_pes-caprae, Oxalis_cernua\ncreeping_oxalis, creeping_wood_sorrel, Oxalis_corniculata\ngoatsfoot, goat's_foot, Oxalis_caprina\nviolet_wood_sorrel, Oxalis_violacea\noca, oka, Oxalis_tuberosa, Oxalis_crenata\ncarambola, carambola_tree, Averrhoa_carambola\nbilimbi, Averrhoa_bilimbi\nmilkwort\nsenega, Polygala_alba\norange_milkwort, yellow_milkwort, candyweed, yellow_bachelor's_button, Polygala_lutea\nflowering_wintergreen, gaywings, bird-on-the-wing, fringed_polygala, Polygala_paucifolia\nSeneca_snakeroot, Seneka_snakeroot, senga_root, senega_root, senega_snakeroot, Polygala_senega\ncommon_milkwort, gand_flower, Polygala_vulgaris\nrue, herb_of_grace, Ruta_graveolens\ncitrus, citrus_tree\norange, orange_tree\nsour_orange, Seville_orange, bitter_orange, bitter_orange_tree, bigarade, marmalade_orange, Citrus_aurantium\nbergamot, bergamot_orange, Citrus_bergamia\npomelo, pomelo_tree, pummelo, shaddock, Citrus_maxima, Citrus_grandis, Citrus_decumana\ncitron, citron_tree, Citrus_medica\ngrapefruit, Citrus_paradisi\nmandarin, mandarin_orange, mandarin_orange_tree, Citrus_reticulata\ntangerine, tangerine_tree\nclementine, clementine_tree\nsatsuma, satsuma_tree\nsweet_orange, sweet_orange_tree, Citrus_sinensis\ntemple_orange, temple_orange_tree, tangor, king_orange, Citrus_nobilis\ntangelo, tangelo_tree, ugli_fruit, Citrus_tangelo\nrangpur, rangpur_lime, lemanderin, Citrus_limonia\nlemon, lemon_tree, Citrus_limon\nsweet_lemon, sweet_lime, Citrus_limetta\nlime, lime_tree, Citrus_aurantifolia\ncitrange, citrange_tree, Citroncirus_webberi\nfraxinella, dittany, burning_bush, gas_plant, Dictamnus_alba\nkumquat, cumquat, kumquat_tree\nmarumi, marumi_kumquat, round_kumquat, Fortunella_japonica\nnagami, nagami_kumquat, oval_kumquat, Fortunella_margarita\ncork_tree, Phellodendron_amurense\ntrifoliate_orange, trifoliata, wild_orange, Poncirus_trifoliata\nprickly_ash\ntoothache_tree, sea_ash, Zanthoxylum_americanum, Zanthoxylum_fraxineum\nHercules'-club, Hercules'-clubs, Hercules-club, Zanthoxylum_clava-herculis\nbitterwood_tree\nmarupa, Simarouba_amara\nparadise_tree, bitterwood, Simarouba_glauca\nailanthus\ntree_of_heaven, tree_of_the_gods, Ailanthus_altissima\nwild_mango, dika, wild_mango_tree, Irvingia_gabonensis\npepper_tree, Kirkia_wilmsii\nJamaica_quassia, bitterwood, Picrasma_excelsa, Picrasma_excelsum\nquassia, bitterwood, Quassia_amara\nnasturtium\ngarden_nasturtium, Indian_cress, Tropaeolum_majus\nbush_nasturtium, Tropaeolum_minus\ncanarybird_flower, canarybird_vine, canary_creeper, Tropaeolum_peregrinum\nbean_caper, Syrian_bean_caper, Zygophyllum_fabago\npalo_santo, Bulnesia_sarmienti\nlignum_vitae, Guaiacum_officinale\ncreosote_bush, coville, hediondilla, Larrea_tridentata\ncaltrop, devil's_weed, Tribulus_terestris\nwillow, willow_tree\nosier\nwhite_willow, Huntingdon_willow, Salix_alba\nsilver_willow, silky_willow, Salix_alba_sericea, Salix_sericea\ngolden_willow, Salix_alba_vitellina, Salix_vitellina\ncricket-bat_willow, Salix_alba_caerulea\narctic_willow, Salix_arctica\nweeping_willow, Babylonian_weeping_willow, Salix_babylonica\nWisconsin_weeping_willow, Salix_pendulina, Salix_blanda, Salix_pendulina_blanda\npussy_willow, Salix_discolor\nsallow\ngoat_willow, florist's_willow, pussy_willow, Salix_caprea\npeachleaf_willow, peach-leaved_willow, almond-leaves_willow, Salix_amygdaloides\nalmond_willow, black_Hollander, Salix_triandra, Salix_amygdalina\nhoary_willow, sage_willow, Salix_candida\ncrack_willow, brittle_willow, snap_willow, Salix_fragilis\nprairie_willow, Salix_humilis\ndwarf_willow, Salix_herbacea\ngrey_willow, gray_willow, Salix_cinerea\narroyo_willow, Salix_lasiolepis\nshining_willow, Salix_lucida\nswamp_willow, black_willow, Salix_nigra\nbay_willow, laurel_willow, Salix_pentandra\npurple_willow, red_willow, red_osier, basket_willow, purple_osier, Salix_purpurea\nbalsam_willow, Salix_pyrifolia\ncreeping_willow, Salix_repens\nSitka_willow, silky_willow, Salix_sitchensis\ndwarf_grey_willow, dwarf_gray_willow, sage_willow, Salix_tristis\nbearberry_willow, Salix_uva-ursi\ncommon_osier, hemp_willow, velvet_osier, Salix_viminalis\npoplar, poplar_tree\nbalsam_poplar, hackmatack, tacamahac, Populus_balsamifera\nwhite_poplar, white_aspen, abele, aspen_poplar, silver-leaved_poplar, Populus_alba\ngrey_poplar, gray_poplar, Populus_canescens\nblack_poplar, Populus_nigra\nLombardy_poplar, Populus_nigra_italica\ncottonwood\nEastern_cottonwood, necklace_poplar, Populus_deltoides\nblack_cottonwood, Western_balsam_poplar, Populus_trichocarpa\nswamp_cottonwood, black_cottonwood, downy_poplar, swamp_poplar, Populus_heterophylla\naspen\nquaking_aspen, European_quaking_aspen, Populus_tremula\nAmerican_quaking_aspen, American_aspen, Populus_tremuloides\nCanadian_aspen, bigtooth_aspen, bigtoothed_aspen, big-toothed_aspen, large-toothed_aspen, large_tooth_aspen, Populus_grandidentata\nsandalwood_tree, true_sandalwood, Santalum_album\nquandong, quandang, quandong_tree, Eucarya_acuminata, Fusanus_acuminatus\nrabbitwood, buffalo_nut, Pyrularia_pubera\nLoranthaceae, family_Loranthaceae, mistletoe_family\nmistletoe, Loranthus_europaeus\nAmerican_mistletoe, Arceuthobium_pusillum\nmistletoe, Viscum_album, Old_World_mistletoe\nAmerican_mistletoe, Phoradendron_serotinum, Phoradendron_flavescens\naalii\nsoapberry, soapberry_tree\nwild_China_tree, Sapindus_drumondii, Sapindus_marginatus\nChina_tree, false_dogwood, jaboncillo, chinaberry, Sapindus_saponaria\nakee, akee_tree, Blighia_sapida\nsoapberry_vine\nheartseed, Cardiospermum_grandiflorum\nballoon_vine, heart_pea, Cardiospermum_halicacabum\nlongan, lungen, longanberry, Dimocarpus_longan, Euphorbia_litchi, Nephelium_longana\nharpullia\nharpulla, Harpullia_cupanioides\nMoreton_Bay_tulipwood, Harpullia_pendula\nlitchi, lichee, litchi_tree, Litchi_chinensis, Nephelium_litchi\nSpanish_lime, Spanish_lime_tree, honey_berry, mamoncillo, genip, ginep, Melicocca_bijuga, Melicocca_bijugatus\nrambutan, rambotan, rambutan_tree, Nephelium_lappaceum\npulasan, pulassan, pulasan_tree, Nephelium_mutabile\npachysandra\nAllegheny_spurge, Allegheny_mountain_spurge, Pachysandra_procumbens\nbittersweet, American_bittersweet, climbing_bittersweet, false_bittersweet, staff_vine, waxwork, shrubby_bittersweet, Celastrus_scandens\nspindle_tree, spindleberry, spindleberry_tree\nwinged_spindle_tree, Euonymous_alatus\nwahoo, burning_bush, Euonymus_atropurpureus\nstrawberry_bush, wahoo, Euonymus_americanus\nevergreen_bittersweet, Euonymus_fortunei_radicans, Euonymus_radicans_vegetus\ncyrilla, leatherwood, white_titi, Cyrilla_racemiflora\ntiti, buckwheat_tree, Cliftonia_monophylla\ncrowberry\nmaple\nsilver_maple, Acer_saccharinum\nsugar_maple, rock_maple, Acer_saccharum\nred_maple, scarlet_maple, swamp_maple, Acer_rubrum\nmoosewood, moose-wood, striped_maple, striped_dogwood, goosefoot_maple, Acer_pennsylvanicum\nOregon_maple, big-leaf_maple, Acer_macrophyllum\ndwarf_maple, Rocky-mountain_maple, Acer_glabrum\nmountain_maple, mountain_alder, Acer_spicatum\nvine_maple, Acer_circinatum\nhedge_maple, field_maple, Acer_campestre\nNorway_maple, Acer_platanoides\nsycamore, great_maple, scottish_maple, Acer_pseudoplatanus\nbox_elder, ash-leaved_maple, Acer_negundo\nCalifornia_box_elder, Acer_negundo_Californicum\npointed-leaf_maple, Acer_argutum\nJapanese_maple, full_moon_maple, Acer_japonicum\nJapanese_maple, Acer_palmatum\nholly\nChinese_holly, Ilex_cornuta\nbearberry, possum_haw, winterberry, Ilex_decidua\ninkberry, gallberry, gall-berry, evergreen_winterberry, Ilex_glabra\nmate, Paraguay_tea, Ilex_paraguariensis\nAmerican_holly, Christmas_holly\nlow_gallberry_holly\ntall_gallberry_holly\nyaupon_holly\ndeciduous_holly\njuneberry_holly\nlargeleaf_holly\nGeogia_holly\ncommon_winterberry_holly\nsmooth_winterberry_holly\ncashew, cashew_tree, Anacardium_occidentale\ngoncalo_alves, Astronium_fraxinifolium\nVenetian_sumac, wig_tree, Cotinus_coggygria\nlaurel_sumac, Malosma_laurina, Rhus_laurina\nmango, mango_tree, Mangifera_indica\npistachio, Pistacia_vera, pistachio_tree\nterebinth, Pistacia_terebinthus\nmastic, mastic_tree, lentisk, Pistacia_lentiscus\nAustralian_sumac, Rhodosphaera_rhodanthema, Rhus_rhodanthema\nsumac, sumach, shumac\nsmooth_sumac, scarlet_sumac, vinegar_tree, Rhus_glabra\nsugar-bush, sugar_sumac, Rhus_ovata\nstaghorn_sumac, velvet_sumac, Virginian_sumac, vinegar_tree, Rhus_typhina\nsquawbush, squaw-bush, skunkbush, Rhus_trilobata\naroeira_blanca, Schinus_chichita\npepper_tree, molle, Peruvian_mastic_tree, Schinus_molle\nBrazilian_pepper_tree, Schinus_terebinthifolius\nhog_plum, yellow_mombin, yellow_mombin_tree, Spondias_mombin\nmombin, mombin_tree, jocote, Spondias_purpurea\npoison_ash, poison_dogwood, poison_sumac, Toxicodendron_vernix, Rhus_vernix\npoison_ivy, markweed, poison_mercury, poison_oak, Toxicodendron_radicans, Rhus_radicans\nwestern_poison_oak, Toxicodendron_diversilobum, Rhus_diversiloba\neastern_poison_oak, Toxicodendron_quercifolium, Rhus_quercifolia, Rhus_toxicodenedron\nvarnish_tree, lacquer_tree, Chinese_lacquer_tree, Japanese_lacquer_tree, Japanese_varnish_tree, Japanese_sumac, Toxicodendron_vernicifluum, Rhus_verniciflua\nhorse_chestnut, buckeye, Aesculus_hippocastanum\nbuckeye, horse_chestnut, conker\nsweet_buckeye\nOhio_buckeye\ndwarf_buckeye, bottlebrush_buckeye\nred_buckeye\nparticolored_buckeye\nebony, ebony_tree, Diospyros_ebenum\nmarblewood, marble-wood, Andaman_marble, Diospyros_kurzii\nmarblewood, marble-wood\npersimmon, persimmon_tree\nJapanese_persimmon, kaki, Diospyros_kaki\nAmerican_persimmon, possumwood, Diospyros_virginiana\ndate_plum, Diospyros_lotus\nbuckthorn\nsouthern_buckthorn, shittimwood, shittim, mock_orange, Bumelia_lycioides\nfalse_buckthorn, chittamwood, chittimwood, shittimwood, black_haw, Bumelia_lanuginosa\nstar_apple, caimito, Chrysophyllum_cainito\nsatinleaf, satin_leaf, caimitillo, damson_plum, Chrysophyllum_oliviforme\nbalata, balata_tree, beefwood, bully_tree, Manilkara_bidentata\nsapodilla, sapodilla_tree, Manilkara_zapota, Achras_zapota\ngutta-percha_tree, Palaquium_gutta\ngutta-percha_tree\ncanistel, canistel_tree, Pouteria_campechiana_nervosa\nmarmalade_tree, mammee, sapote, Pouteria_zapota, Calocarpum_zapota\nsweetleaf, Symplocus_tinctoria\nAsiatic_sweetleaf, sapphire_berry, Symplocus_paniculata\nstyrax\nsnowbell, Styrax_obassia\nJapanese_snowbell, Styrax_japonicum\nTexas_snowbell, Texas_snowbells, Styrax_texana\nsilver-bell_tree, silverbell_tree, snowdrop_tree, opossum_wood, Halesia_carolina, Halesia_tetraptera\ncarnivorous_plant\npitcher_plant\ncommon_pitcher_plant, huntsman's_cup, huntsman's_cups, Sarracenia_purpurea\nhooded_pitcher_plant, Sarracenia_minor\nhuntsman's_horn, huntsman's_horns, yellow_trumpet, yellow_pitcher_plant, trumpets, Sarracenia_flava\ntropical_pitcher_plant\nsundew, sundew_plant, daily_dew\nVenus's_flytrap, Venus's_flytraps, Dionaea_muscipula\nwaterwheel_plant, Aldrovanda_vesiculosa\nDrosophyllum_lusitanicum\nroridula\nAustralian_pitcher_plant, Cephalotus_follicularis\nsedum\nstonecrop\nrose-root, midsummer-men, Sedum_rosea\norpine, orpin, livelong, live-forever, Sedum_telephium\npinwheel, Aeonium_haworthii\nChristmas_bush, Christmas_tree, Ceratopetalum_gummiferum\nhortensia, Hydrangea_macrophylla_hortensis\nfall-blooming_hydrangea, Hydrangea_paniculata\ncarpenteria, Carpenteria_californica\ndecumary, Decumaria_barbata, Decumaria_barbara\ndeutzia\nphiladelphus\nmock_orange, syringa, Philadelphus_coronarius\nsaxifrage, breakstone, rockfoil\nyellow_mountain_saxifrage, Saxifraga_aizoides\nmeadow_saxifrage, fair-maids-of-France, Saxifraga_granulata\nmossy_saxifrage, Saxifraga_hypnoides\nwestern_saxifrage, Saxifraga_occidentalis\npurple_saxifrage, Saxifraga_oppositifolia\nstar_saxifrage, starry_saxifrage, Saxifraga_stellaris\nstrawberry_geranium, strawberry_saxifrage, mother-of-thousands, Saxifraga_stolonifera, Saxifraga_sarmentosam\nastilbe\nfalse_goatsbeard, Astilbe_biternata\ndwarf_astilbe, Astilbe_chinensis_pumila\nspirea, spiraea, Astilbe_japonica\nbergenia\ncoast_boykinia, Boykinia_elata, Boykinia_occidentalis\ngolden_saxifrage, golden_spleen\numbrella_plant, Indian_rhubarb, Darmera_peltata, Peltiphyllum_peltatum\nbridal_wreath, bridal-wreath, Francoa_ramosa\nalumroot, alumbloom\ncoralbells, Heuchera_sanguinea\nleatherleaf_saxifrage, Leptarrhena_pyrolifolia\nwoodland_star, Lithophragma_affine, Lithophragma_affinis, Tellima_affinis\nprairie_star, Lithophragma_parviflorum\nmiterwort, mitrewort, bishop's_cap\nfive-point_bishop's_cap, Mitella_pentandra\nparnassia, grass-of-Parnassus\nbog_star, Parnassia_palustris\nfringed_grass_of_Parnassus, Parnassia_fimbriata\nfalse_alumroot, fringe_cups, Tellima_grandiflora\nfoamflower, coolwart, false_miterwort, false_mitrewort, Tiarella_cordifolia\nfalse_miterwort, false_mitrewort, Tiarella_unifoliata\npickaback_plant, piggyback_plant, youth-on-age, Tolmiea_menziesii\ncurrant, currant_bush\nblack_currant, European_black_currant, Ribes_nigrum\nwhite_currant, Ribes_sativum\ngooseberry, gooseberry_bush, Ribes_uva-crispa, Ribes_grossularia\nplane_tree, sycamore, platan\nLondon_plane, Platanus_acerifolia\nAmerican_sycamore, American_plane, buttonwood, Platanus_occidentalis\noriental_plane, Platanus_orientalis\nCalifornia_sycamore, Platanus_racemosa\nArizona_sycamore, Platanus_wrightii\nGreek_valerian, Polemonium_reptans\nnorthern_Jacob's_ladder, Polemonium_boreale\nskunkweed, skunk-weed, Polemonium_viscosum\nphlox\nmoss_pink, mountain_phlox, moss_phlox, dwarf_phlox, Phlox_subulata\nevening-snow, Linanthus_dichotomus\nacanthus\nbear's_breech, bear's_breeches, sea_holly, Acanthus_mollis\ncaricature_plant, Graptophyllum_pictum\nblack-eyed_Susan, black-eyed_Susan_vine, Thunbergia_alata\ncatalpa, Indian_bean\nCatalpa_bignioides\nCatalpa_speciosa\ndesert_willow, Chilopsis_linearis\ncalabash, calabash_tree, Crescentia_cujete\ncalabash\nborage, tailwort, Borago_officinalis\ncommon_amsinckia, Amsinckia_intermedia\nanchusa\nbugloss, alkanet, Anchusa_officinalis\ncape_forget-me-not, Anchusa_capensis\ncape_forget-me-not, Anchusa_riparia\nSpanish_elm, Equador_laurel, salmwood, cypre, princewood, Cordia_alliodora\nprincewood, Spanish_elm, Cordia_gerascanthus\nChinese_forget-me-not, Cynoglossum_amabile\nhound's-tongue, Cynoglossum_officinale\nhound's-tongue, Cynoglossum_virginaticum\nblueweed, blue_devil, blue_thistle, viper's_bugloss, Echium_vulgare\nbeggar's_lice, beggar_lice\ngromwell, Lithospermum_officinale\npuccoon, Lithospermum_caroliniense\nVirginia_bluebell, Virginia_cowslip, Mertensia_virginica\ngarden_forget-me-not, Myosotis_sylvatica\nforget-me-not, mouse_ear, Myosotis_scorpiodes\nfalse_gromwell\ncomfrey, cumfrey\ncommon_comfrey, boneset, Symphytum_officinale\nconvolvulus\nbindweed\nfield_bindweed, wild_morning-glory, Convolvulus_arvensis\nscammony, Convolvulus_scammonia\nsilverweed\ndodder\ndichondra, Dichondra_micrantha\ncypress_vine, star-glory, Indian_pink, Ipomoea_quamoclit, Quamoclit_pennata\nmoonflower, belle_de_nuit, Ipomoea_alba\nwild_potato_vine, wild_sweet_potato_vine, man-of-the-earth, manroot, scammonyroot, Ipomoea_panurata, Ipomoea_fastigiata\nred_morning-glory, star_ipomoea, Ipomoea_coccinea\nman-of-the-earth, Ipomoea_leptophylla\nscammony, Ipomoea_orizabensis\nJapanese_morning_glory, Ipomoea_nil\nimperial_Japanese_morning_glory, Ipomoea_imperialis\ngesneriad\ngesneria\nachimenes, hot_water_plant\naeschynanthus\nlace-flower_vine, Alsobia_dianthiflora, Episcia_dianthiflora\ncolumnea\nepiscia\ngloxinia\nCanterbury_bell, Gloxinia_perennis\nkohleria\nAfrican_violet, Saintpaulia_ionantha\nstreptocarpus\nCape_primrose\nwaterleaf\nVirginia_waterleaf, Shawnee_salad, shawny, Indian_salad, John's_cabbage, Hydrophyllum_virginianum\nyellow_bells, California_yellow_bells, whispering_bells, Emmanthe_penduliflora\nyerba_santa, Eriodictyon_californicum\nnemophila\nbaby_blue-eyes, Nemophila_menziesii\nfive-spot, Nemophila_maculata\nscorpionweed, scorpion_weed, phacelia\nCalifornia_bluebell, Phacelia_campanularia\nCalifornia_bluebell, whitlavia, Phacelia_minor, Phacelia_whitlavia\nfiddleneck, Phacelia_tanacetifolia\nfiesta_flower, Pholistoma_auritum, Nemophila_aurita\nbasil_thyme, basil_balm, mother_of_thyme, Acinos_arvensis, Satureja_acinos\ngiant_hyssop\nyellow_giant_hyssop, Agastache_nepetoides\nanise_hyssop, Agastache_foeniculum\nMexican_hyssop, Agastache_mexicana\nbugle, bugleweed\ncreeping_bugle, Ajuga_reptans\nerect_bugle, blue_bugle, Ajuga_genevensis\npyramid_bugle, Ajuga_pyramidalis\nwood_mint\nhairy_wood_mint, Blephilia_hirsuta\ndowny_wood_mint, Blephilia_celiata\ncalamint\ncommon_calamint, Calamintha_sylvatica, Satureja_calamintha_officinalis\nlarge-flowered_calamint, Calamintha_grandiflora, Clinopodium_grandiflorum, Satureja_grandiflora\nlesser_calamint, field_balm, Calamintha_nepeta, Calamintha_nepeta_glantulosa, Satureja_nepeta, Satureja_calamintha_glandulosa\nwild_basil, cushion_calamint, Clinopodium_vulgare, Satureja_vulgaris\nhorse_balm, horseweed, stoneroot, stone-root, richweed, stone_root, Collinsonia_canadensis\ncoleus, flame_nettle\ncountry_borage, Coleus_aromaticus, Coleus_amboinicus, Plectranthus_amboinicus\npainted_nettle, Joseph's_coat, Coleus_blumei, Solenostemon_blumei, Solenostemon_scutellarioides\nApalachicola_rosemary, Conradina_glabra\ndragonhead, dragon's_head, Dracocephalum_parviflorum\nelsholtzia\nhemp_nettle, dead_nettle, Galeopsis_tetrahit\nground_ivy, alehoof, field_balm, gill-over-the-ground, runaway_robin, Glechoma_hederaceae, Nepeta_hederaceae\npennyroyal, American_pennyroyal, Hedeoma_pulegioides\nhyssop, Hyssopus_officinalis\ndead_nettle\nwhite_dead_nettle, Lamium_album\nhenbit, Lamium_amplexicaule\nEnglish_lavender, Lavandula_angustifolia, Lavandula_officinalis\nFrench_lavender, Lavandula_stoechas\nspike_lavender, French_lavender, Lavandula_latifolia\ndagga, Cape_dagga, red_dagga, wilde_dagga, Leonotis_leonurus\nlion's-ear, Leonotis_nepetaefolia, Leonotis_nepetifolia\nmotherwort, Leonurus_cardiaca\npitcher_sage, Lepechinia_calycina, Sphacele_calycina\nbugleweed, Lycopus_virginicus\nwater_horehound, Lycopus_americanus\ngipsywort, gypsywort, Lycopus_europaeus\noriganum\noregano, marjoram, pot_marjoram, wild_marjoram, winter_sweet, Origanum_vulgare\nsweet_marjoram, knotted_marjoram, Origanum_majorana, Majorana_hortensis\nhorehound\ncommon_horehound, white_horehound, Marrubium_vulgare\nlemon_balm, garden_balm, sweet_balm, bee_balm, beebalm, Melissa_officinalis\ncorn_mint, field_mint, Mentha_arvensis\nwater-mint, water_mint, Mentha_aquatica\nbergamot_mint, lemon_mint, eau_de_cologne_mint, Mentha_citrata\nhorsemint, Mentha_longifolia\npeppermint, Mentha_piperita\nspearmint, Mentha_spicata\napple_mint, applemint, Mentha_rotundifolia, Mentha_suaveolens\npennyroyal, Mentha_pulegium\nyerba_buena, Micromeria_chamissonis, Micromeria_douglasii, Satureja_douglasii\nmolucca_balm, bells_of_Ireland, Molucella_laevis\nmonarda, wild_bergamot\nbee_balm, beebalm, bergamot_mint, oswego_tea, Monarda_didyma\nhorsemint, Monarda_punctata\nbee_balm, beebalm, Monarda_fistulosa\nlemon_mint, horsemint, Monarda_citriodora\nplains_lemon_monarda, Monarda_pectinata\nbasil_balm, Monarda_clinopodia\nmustang_mint, Monardella_lanceolata\ncatmint, catnip, Nepeta_cataria\nbasil\nbeefsteak_plant, Perilla_frutescens_crispa\nphlomis\nJerusalem_sage, Phlomis_fruticosa\nphysostegia\nplectranthus\npatchouli, patchouly, pachouli, Pogostemon_cablin\nself-heal, heal_all, Prunella_vulgaris\nmountain_mint\nrosemary, Rosmarinus_officinalis\nclary_sage, Salvia_clarea\npurple_sage, chaparral_sage, Salvia_leucophylla\ncancerweed, cancer_weed, Salvia_lyrata\ncommon_sage, ramona, Salvia_officinalis\nmeadow_clary, Salvia_pratensis\nclary, Salvia_sclarea\npitcher_sage, Salvia_spathacea\nMexican_mint, Salvia_divinorum\nwild_sage, wild_clary, vervain_sage, Salvia_verbenaca\nsavory\nsummer_savory, Satureja_hortensis, Satureia_hortensis\nwinter_savory, Satureja_montana, Satureia_montana\nskullcap, helmetflower\nblue_pimpernel, blue_skullcap, mad-dog_skullcap, mad-dog_weed, Scutellaria_lateriflora\nhedge_nettle, dead_nettle, Stachys_sylvatica\nhedge_nettle, Stachys_palustris\ngermander\nAmerican_germander, wood_sage, Teucrium_canadense\ncat_thyme, marum, Teucrium_marum\nwood_sage, Teucrium_scorodonia\nthyme\ncommon_thyme, Thymus_vulgaris\nwild_thyme, creeping_thyme, Thymus_serpyllum\nblue_curls\nturpentine_camphor_weed, camphorweed, vinegarweed, Trichostema_lanceolatum\nbastard_pennyroyal, Trichostema_dichotomum\nbladderwort\nbutterwort\ngenlisea\nmartynia, Martynia_annua\ncommon_unicorn_plant, devil's_claw, common_devil's_claw, elephant-tusk, proboscis_flower, ram's_horn, Proboscidea_louisianica\nsand_devil's_claw, Proboscidea_arenaria, Martynia_arenaria\nsweet_unicorn_plant, Proboscidea_fragrans, Martynia_fragrans\nfigwort\nsnapdragon\nwhite_snapdragon, Antirrhinum_coulterianum\nyellow_twining_snapdragon, Antirrhinum_filipes\nMediterranean_snapdragon, Antirrhinum_majus\nkitten-tails\nAlpine_besseya, Besseya_alpina\nfalse_foxglove, Aureolaria_pedicularia, Gerardia_pedicularia\nfalse_foxglove, Aureolaria_virginica, Gerardia_virginica\ncalceolaria, slipperwort\nIndian_paintbrush, painted_cup\ndesert_paintbrush, Castilleja_chromosa\ngiant_red_paintbrush, Castilleja_miniata\ngreat_plains_paintbrush, Castilleja_sessiliflora\nsulfur_paintbrush, Castilleja_sulphurea\nshellflower, shell-flower, turtlehead, snakehead, snake-head, Chelone_glabra\nmaiden_blue-eyed_Mary, Collinsia_parviflora\nblue-eyed_Mary, Collinsia_verna\nfoxglove, digitalis\ncommon_foxglove, fairy_bell, fingerflower, finger-flower, fingerroot, finger-root, Digitalis_purpurea\nyellow_foxglove, straw_foxglove, Digitalis_lutea\ngerardia\nblue_toadflax, old-field_toadflax, Linaria_canadensis\ntoadflax, butter-and-eggs, wild_snapdragon, devil's_flax, Linaria_vulgaris\ngolden-beard_penstemon, Penstemon_barbatus\nscarlet_bugler, Penstemon_centranthifolius\nred_shrubby_penstemon, redwood_penstemon\nPlatte_River_penstemon, Penstemon_cyananthus\nhot-rock_penstemon, Penstemon_deustus\nJones'_penstemon, Penstemon_dolius\nshrubby_penstemon, lowbush_penstemon, Penstemon_fruticosus\nnarrow-leaf_penstemon, Penstemon_linarioides\nballoon_flower, scented_penstemon, Penstemon_palmeri\nParry's_penstemon, Penstemon_parryi\nrock_penstemon, cliff_penstemon, Penstemon_rupicola\nRydberg's_penstemon, Penstemon_rydbergii\ncascade_penstemon, Penstemon_serrulatus\nWhipple's_penstemon, Penstemon_whippleanus\nmoth_mullein, Verbascum_blattaria\nwhite_mullein, Verbascum_lychnitis\npurple_mullein, Verbascum_phoeniceum\ncommon_mullein, great_mullein, Aaron's_rod, flannel_mullein, woolly_mullein, torch, Verbascum_thapsus\nveronica, speedwell\nfield_speedwell, Veronica_agrestis\nbrooklime, American_brooklime, Veronica_americana\ncorn_speedwell, Veronica_arvensis\nbrooklime, European_brooklime, Veronica_beccabunga\ngermander_speedwell, bird's_eye, Veronica_chamaedrys\nwater_speedwell, Veronica_michauxii, Veronica_anagallis-aquatica\ncommon_speedwell, gypsyweed, Veronica_officinalis\npurslane_speedwell, Veronica_peregrina\nthyme-leaved_speedwell, Veronica_serpyllifolia\nnightshade\nhorse_nettle, ball_nettle, bull_nettle, ball_nightshade, Solanum_carolinense\nAfrican_holly, Solanum_giganteum\npotato_vine, Solanum_jasmoides\ngarden_huckleberry, wonderberry, sunberry, Solanum_nigrum_guineese, Solanum_melanocerasum, Solanum_burbankii\nnaranjilla, Solanum_quitoense\npotato_vine, giant_potato_creeper, Solanum_wendlandii\npotato_tree, Brazilian_potato_tree, Solanum_wrightii, Solanum_macranthum\nbelladonna, belladonna_plant, deadly_nightshade, Atropa_belladonna\nbush_violet, browallia\nlady-of-the-night, Brunfelsia_americana\nangel's_trumpet, maikoa, Brugmansia_arborea, Datura_arborea\nangel's_trumpet, Brugmansia_suaveolens, Datura_suaveolens\nred_angel's_trumpet, Brugmansia_sanguinea, Datura_sanguinea\ncone_pepper, Capsicum_annuum_conoides\nbird_pepper, Capsicum_frutescens_baccatum, Capsicum_baccatum\nday_jessamine, Cestrum_diurnum\nnight_jasmine, night_jessamine, Cestrum_nocturnum\ntree_tomato, tamarillo\nthorn_apple\njimsonweed, jimson_weed, Jamestown_weed, common_thorn_apple, apple_of_Peru, Datura_stramonium\npichi, Fabiana_imbricata\nhenbane, black_henbane, stinking_nightshade, Hyoscyamus_niger\nEgyptian_henbane, Hyoscyamus_muticus\nmatrimony_vine, boxthorn\ncommon_matrimony_vine, Duke_of_Argyll's_tea_tree, Lycium_barbarum, Lycium_halimifolium\nChristmasberry, Christmas_berry, Lycium_carolinianum\nplum_tomato\nmandrake, devil's_apples, Mandragora_officinarum\nmandrake_root, mandrake\napple_of_Peru, shoo_fly, Nicandra_physaloides\nflowering_tobacco, Jasmine_tobacco, Nicotiana_alata\ncommon_tobacco, Nicotiana_tabacum\nwild_tobacco, Indian_tobacco, Nicotiana_rustica\ncupflower, nierembergia\nwhitecup, Nierembergia_repens, Nierembergia_rivularis\npetunia\nlarge_white_petunia, Petunia_axillaris\nviolet-flowered_petunia, Petunia_integrifolia\nhybrid_petunia, Petunia_hybrida\ncape_gooseberry, purple_ground_cherry, Physalis_peruviana\nstrawberry_tomato, dwarf_cape_gooseberry, Physalis_pruinosa\ntomatillo, jamberry, Mexican_husk_tomato, Physalis_ixocarpa\ntomatillo, miltomate, purple_ground_cherry, jamberry, Physalis_philadelphica\nyellow_henbane, Physalis_viscosa\ncock's_eggs, Salpichroa_organifolia, Salpichroa_rhomboidea\nsalpiglossis\npainted_tongue, Salpiglossis_sinuata\nbutterfly_flower, poor_man's_orchid, schizanthus\nScopolia_carniolica\nchalice_vine, trumpet_flower, cupflower, Solandra_guttata\nverbena, vervain\nlantana\nblack_mangrove, Avicennia_marina\nwhite_mangrove, Avicennia_officinalis\nblack_mangrove, Aegiceras_majus\nteak, Tectona_grandis\nspurge\nsun_spurge, wartweed, wartwort, devil's_milk, Euphorbia_helioscopia\npetty_spurge, devil's_milk, Euphorbia_peplus\nmedusa's_head, Euphorbia_medusae, Euphorbia_caput-medusae\nwild_spurge, flowering_spurge, tramp's_spurge, Euphorbia_corollata\nsnow-on-the-mountain, snow-in-summer, ghost_weed, Euphorbia_marginata\ncypress_spurge, Euphorbia_cyparissias\nleafy_spurge, wolf's_milk, Euphorbia_esula\nhairy_spurge, Euphorbia_hirsuta\npoinsettia, Christmas_star, Christmas_flower, lobster_plant, Mexican_flameleaf, painted_leaf, Euphorbia_pulcherrima\nJapanese_poinsettia, mole_plant, paint_leaf, Euphorbia_heterophylla\nfire-on-the-mountain, painted_leaf, Mexican_fire_plant, Euphorbia_cyathophora\nwood_spurge, Euphorbia_amygdaloides\ndwarf_spurge, Euphorbia_exigua\nscarlet_plume, Euphorbia_fulgens\nnaboom, cactus_euphorbia, Euphorbia_ingens\ncrown_of_thorns, Christ_thorn, Christ_plant, Euphorbia_milii\ntoothed_spurge, Euphorbia_dentata\nthree-seeded_mercury, Acalypha_virginica\ncroton, Croton_tiglium\ncascarilla, Croton_eluteria\ncascarilla_bark, eleuthera_bark, sweetwood_bark\ncastor-oil_plant, castor_bean_plant, palma_christi, palma_christ, Ricinus_communis\nspurge_nettle, tread-softly, devil_nettle, pica-pica, Cnidoscolus_urens, Jatropha_urens, Jatropha_stimulosus\nphysic_nut, Jatropha_curcus\nPara_rubber_tree, caoutchouc_tree, Hevea_brasiliensis\ncassava, casava\nbitter_cassava, manioc, mandioc, mandioca, tapioca_plant, gari, Manihot_esculenta, Manihot_utilissima\ncassava, manioc\nsweet_cassava, Manihot_dulcis\ncandlenut, varnish_tree, Aleurites_moluccana\ntung_tree, tung, tung-oil_tree, Aleurites_fordii\nslipper_spurge, slipper_plant\ncandelilla, Pedilanthus_bracteatus, Pedilanthus_pavonis\nJewbush, Jew-bush, Jew_bush, redbird_cactus, redbird_flower, Pedilanthus_tithymaloides\njumping_bean, jumping_seed, Mexican_jumping_bean\ncamellia, camelia\njaponica, Camellia_japonica\numbellifer, umbelliferous_plant\nwild_parsley\nfool's_parsley, lesser_hemlock, Aethusa_cynapium\ndill, Anethum_graveolens\nangelica, angelique\ngarden_angelica, archangel, Angelica_Archangelica\nwild_angelica, Angelica_sylvestris\nchervil, beaked_parsley, Anthriscus_cereifolium\ncow_parsley, wild_chervil, Anthriscus_sylvestris\nwild_celery, Apium_graveolens\nastrantia, masterwort\ngreater_masterwort, Astrantia_major\ncaraway, Carum_carvi\nwhorled_caraway\nwater_hemlock, Cicuta_verosa\nspotted_cowbane, spotted_hemlock, spotted_water_hemlock\nhemlock, poison_hemlock, poison_parsley, California_fern, Nebraska_fern, winter_fern, Conium_maculatum\nearthnut, Conopodium_denudatum\ncumin, Cuminum_cyminum\nwild_carrot, Queen_Anne's_lace, Daucus_carota\neryngo, eringo\nsea_holly, sea_holm, sea_eryngium, Eryngium_maritimum\nbutton_snakeroot, Eryngium_aquaticum\nrattlesnake_master, rattlesnake's_master, button_snakeroot, Eryngium_yuccifolium\nfennel\ncommon_fennel, Foeniculum_vulgare\nFlorence_fennel, Foeniculum_dulce, Foeniculum_vulgare_dulce\ncow_parsnip, hogweed, Heracleum_sphondylium\nlovage, Levisticum_officinale\nsweet_cicely, Myrrhis_odorata\nwater_fennel, Oenanthe_aquatica\nparsnip, Pastinaca_sativa\ncultivated_parsnip\nwild_parsnip, madnep\nparsley, Petroselinum_crispum\nItalian_parsley, flat-leaf_parsley, Petroselinum_crispum_neapolitanum\nHamburg_parsley, turnip-rooted_parsley, Petroselinum_crispum_tuberosum\nanise, anise_plant, Pimpinella_anisum\nsanicle, snakeroot\npurple_sanicle, Sanicula_bipinnatifida\nEuropean_sanicle, Sanicula_Europaea\nwater_parsnip, Sium_suave\ngreater_water_parsnip, Sium_latifolium\nskirret, Sium_sisarum\ndogwood, dogwood_tree, cornel\ncommon_white_dogwood, eastern_flowering_dogwood, Cornus_florida\nred_osier, red_osier_dogwood, red_dogwood, American_dogwood, redbrush, Cornus_stolonifera\nsilky_dogwood, Cornus_obliqua\nsilky_cornel, silky_dogwood, Cornus_amomum\ncommon_European_dogwood, red_dogwood, blood-twig, pedwood, Cornus_sanguinea\nbunchberry, dwarf_cornel, crackerberry, pudding_berry, Cornus_canadensis\ncornelian_cherry, Cornus_mas\npuka, Griselinia_lucida\nkapuka, Griselinia_littoralis\nvalerian\ncommon_valerian, garden_heliotrope, Valeriana_officinalis\ncommon_corn_salad, lamb's_lettuce, Valerianella_olitoria, Valerianella_locusta\nred_valerian, French_honeysuckle, Centranthus_ruber\nfilmy_fern, film_fern\nbristle_fern, filmy_fern\nhare's-foot_bristle_fern, Trichomanes_boschianum\nKillarney_fern, Trichomanes_speciosum\nkidney_fern, Trichomanes_reniforme\nflowering_fern, osmund\nroyal_fern, royal_osmund, king_fern, ditch_fern, French_bracken, Osmunda_regalis\ninterrupted_fern, Osmunda_clatonia\ncrape_fern, Prince-of-Wales_fern, Prince-of-Wales_feather, Prince-of-Wales_plume, Leptopteris_superba, Todea_superba\ncrepe_fern, king_fern, Todea_barbara\ncurly_grass, curly_grass_fern, Schizaea_pusilla\npine_fern, Anemia_adiantifolia\nclimbing_fern\ncreeping_fern, Hartford_fern, Lygodium_palmatum\nclimbing_maidenhair, climbing_maidenhair_fern, snake_fern, Lygodium_microphyllum\nscented_fern, Mohria_caffrorum\nclover_fern, pepperwort\nnardoo, nardo, common_nardoo, Marsilea_drummondii\nwater_clover, Marsilea_quadrifolia\npillwort, Pilularia_globulifera\nregnellidium, Regnellidium_diphyllum\nfloating-moss, Salvinia_rotundifolia, Salvinia_auriculata\nmosquito_fern, floating_fern, Carolina_pond_fern, Azolla_caroliniana\nadder's_tongue, adder's_tongue_fern\nribbon_fern, Ophioglossum_pendulum\ngrape_fern\ndaisyleaf_grape_fern, daisy-leaved_grape_fern, Botrychium_matricariifolium\nleathery_grape_fern, Botrychium_multifidum\nrattlesnake_fern, Botrychium_virginianum\nflowering_fern, Helminthostachys_zeylanica\npowdery_mildew\nDutch_elm_fungus, Ceratostomella_ulmi\nergot, Claviceps_purpurea\nrye_ergot\nblack_root_rot_fungus, Xylaria_mali\ndead-man's-fingers, dead-men's-fingers, Xylaria_polymorpha\nsclerotinia\nbrown_cup\nearthball, false_truffle, puffball, hard-skinned_puffball\nScleroderma_citrinum, Scleroderma_aurantium\nScleroderma_flavidium, star_earthball\nScleroderma_bovista, smooth_earthball\nPodaxaceae\nstalked_puffball\nstalked_puffball\nfalse_truffle\nRhizopogon_idahoensis\nTruncocolumella_citrina\nmucor\nrhizopus\nbread_mold, Rhizopus_nigricans\nslime_mold, slime_mould\ntrue_slime_mold, acellular_slime_mold, plasmodial_slime_mold, myxomycete\ncellular_slime_mold\ndictostylium\npond-scum_parasite\npotato_wart_fungus, Synchytrium_endobioticum\nwhite_fungus, Saprolegnia_ferax\nwater_mold\ndowny_mildew, false_mildew\nblue_mold_fungus, Peronospora_tabacina\nonion_mildew, Peronospora_destructor\ntobacco_mildew, Peronospora_hyoscyami\nwhite_rust\npythium\ndamping_off_fungus, Pythium_debaryanum\nPhytophthora_citrophthora\nPhytophthora_infestans\nclubroot_fungus, Plasmodiophora_brassicae\nGeglossaceae\nSarcosomataceae\nRufous_rubber_cup\ndevil's_cigar\ndevil's_urn\ntruffle, earthnut, earth-ball\nclub_fungus\ncoral_fungus\ntooth_fungus\nlichen\nascolichen\nbasidiolichen\nlecanora\nmanna_lichen\narchil, orchil\nroccella, Roccella_tinctoria\nbeard_lichen, beard_moss, Usnea_barbata\nhorsehair_lichen, horsetail_lichen\nreindeer_moss, reindeer_lichen, arctic_moss, Cladonia_rangiferina\ncrottle, crottal, crotal\nIceland_moss, Iceland_lichen, Cetraria_islandica\nfungus\npromycelium\ntrue_fungus\nbasidiomycete, basidiomycetous_fungi\nmushroom\nagaric\nmushroom\nmushroom\ntoadstool\nhorse_mushroom, Agaricus_arvensis\nmeadow_mushroom, field_mushroom, Agaricus_campestris\nshiitake, shiitake_mushroom, Chinese_black_mushroom, golden_oak_mushroom, Oriental_black_mushroom, Lentinus_edodes\nscaly_lentinus, Lentinus_lepideus\nroyal_agaric, Caesar's_agaric, Amanita_caesarea\nfalse_deathcap, Amanita_mappa\nfly_agaric, Amanita_muscaria\ndeath_cap, death_cup, death_angel, destroying_angel, Amanita_phalloides\nblushing_mushroom, blusher, Amanita_rubescens\ndestroying_angel, Amanita_verna\nchanterelle, chantarelle, Cantharellus_cibarius\nfloccose_chanterelle, Cantharellus_floccosus\npig's_ears, Cantharellus_clavatus\ncinnabar_chanterelle, Cantharellus_cinnabarinus\njack-o-lantern_fungus, jack-o-lantern, jack-a-lantern, Omphalotus_illudens\ninky_cap, inky-cap_mushroom, Coprinus_atramentarius\nshaggymane, shaggy_cap, shaggymane_mushroom, Coprinus_comatus\nmilkcap, Lactarius_delicioso\nfairy-ring_mushroom, Marasmius_oreades\nfairy_ring, fairy_circle\noyster_mushroom, oyster_fungus, oyster_agaric, Pleurotus_ostreatus\nolive-tree_agaric, Pleurotus_phosphoreus\nPholiota_astragalina\nPholiota_aurea, golden_pholiota\nPholiota_destruens\nPholiota_flammans\nPholiota_flavida\nnameko, viscid_mushroom, Pholiota_nameko\nPholiota_squarrosa-adiposa\nPholiota_squarrosa, scaly_pholiota\nPholiota_squarrosoides\nStropharia_ambigua\nStropharia_hornemannii\nStropharia_rugoso-annulata\ngill_fungus\nEntoloma_lividum, Entoloma_sinuatum\nEntoloma_aprile\nChlorophyllum_molybdites\nlepiota\nparasol_mushroom, Lepiota_procera\npoisonous_parasol, Lepiota_morgani\nLepiota_naucina\nLepiota_rhacodes\nAmerican_parasol, Lepiota_americana\nLepiota_rubrotincta\nLepiota_clypeolaria\nonion_stem, Lepiota_cepaestipes\npink_disease_fungus, Corticium_salmonicolor\nbottom_rot_fungus, Corticium_solani\npotato_fungus, Pellicularia_filamentosa, Rhizoctinia_solani\ncoffee_fungus, Pellicularia_koleroga\nblewits, Clitocybe_nuda\nsandy_mushroom, Tricholoma_populinum\nTricholoma_pessundatum\nTricholoma_sejunctum\nman-on-a-horse, Tricholoma_flavovirens\nTricholoma_venenata\nTricholoma_pardinum\nTricholoma_vaccinum\nTricholoma_aurantium\nVolvaria_bombycina\nPluteus_aurantiorugosus\nPluteus_magnus, sawdust_mushroom\ndeer_mushroom, Pluteus_cervinus\nstraw_mushroom, Chinese_mushroom, Volvariella_volvacea\nVolvariella_bombycina\nClitocybe_clavipes\nClitocybe_dealbata\nClitocybe_inornata\nClitocybe_robusta, Clytocybe_alba\nClitocybe_irina, Tricholoma_irinum, Lepista_irina\nClitocybe_subconnexa\nwinter_mushroom, Flammulina_velutipes\nmycelium\nsclerotium\nsac_fungus\nascomycete, ascomycetous_fungus\nClavicipitaceae, grainy_club_mushrooms\ngrainy_club\nyeast\nbaker's_yeast, brewer's_yeast, Saccharomyces_cerevisiae\nwine-maker's_yeast, Saccharomyces_ellipsoides\nAspergillus_fumigatus\nbrown_root_rot_fungus, Thielavia_basicola\ndiscomycete, cup_fungus\nLeotia_lubrica\nMitrula_elegans\nSarcoscypha_coccinea, scarlet_cup\nCaloscypha_fulgens\nAleuria_aurantia, orange_peel_fungus\nelf_cup\nPeziza_domicilina\nblood_cup, fairy_cup, Peziza_coccinea\nUrnula_craterium, urn_fungus\nGaliella_rufa\nJafnea_semitosta\nmorel\ncommon_morel, Morchella_esculenta, sponge_mushroom, sponge_morel\nDisciotis_venosa, cup_morel\nVerpa, bell_morel\nVerpa_bohemica, early_morel\nVerpa_conica, conic_Verpa\nblack_morel, Morchella_conica, conic_morel, Morchella_angusticeps, narrowhead_morel\nMorchella_crassipes, thick-footed_morel\nMorchella_semilibera, half-free_morel, cow's_head\nWynnea_americana\nWynnea_sparassoides\nfalse_morel\nlorchel\nhelvella\nHelvella_crispa, miter_mushroom\nHelvella_acetabulum\nHelvella_sulcata\ndiscina\ngyromitra\nGyromitra_californica, California_false_morel\nGyromitra_sphaerospora, round-spored_gyromitra\nGyromitra_esculenta, brain_mushroom, beefsteak_morel\nGyromitra_infula, saddled-shaped_false_morel\nGyromitra_fastigiata, Gyromitra_brunnea\nGyromitra_gigas\ngasteromycete, gastromycete\nstinkhorn, carrion_fungus\ncommon_stinkhorn, Phallus_impudicus\nPhallus_ravenelii\ndog_stinkhorn, Mutinus_caninus\nCalostoma_lutescens\nCalostoma_cinnabarina\nCalostoma_ravenelii\nstinky_squid, Pseudocolus_fusiformis\npuffball, true_puffball\ngiant_puffball, Calvatia_gigantea\nearthstar\nGeastrum_coronatum\nRadiigera_fuscogleba\nAstreus_pteridis\nAstreus_hygrometricus\nbird's-nest_fungus\nGastrocybe_lateritia\nMacowanites_americanus\npolypore, pore_fungus, pore_mushroom\nbracket_fungus, shelf_fungus\nAlbatrellus_dispansus\nAlbatrellus_ovinus, sheep_polypore\nNeolentinus_ponderosus\nOligoporus_leucospongia\nPolyporus_tenuiculus\nhen-of-the-woods, hen_of_the_woods, Polyporus_frondosus, Grifola_frondosa\nPolyporus_squamosus, scaly_polypore\nbeefsteak_fungus, Fistulina_hepatica\nagaric, Fomes_igniarius\nbolete\nBoletus_chrysenteron\nBoletus_edulis\nFrost's_bolete, Boletus_frostii\nBoletus_luridus\nBoletus_mirabilis\nBoletus_pallidus\nBoletus_pulcherrimus\nBoletus_pulverulentus\nBoletus_roxanae\nBoletus_subvelutipes\nBoletus_variipes\nBoletus_zelleri\nFuscoboletinus_paluster\nFuscoboletinus_serotinus\nLeccinum_fibrillosum\nSuillus_albivelatus\nold-man-of-the-woods, Strobilomyces_floccopus\nBoletellus_russellii\njelly_fungus\nsnow_mushroom, Tremella_fuciformis\nwitches'_butter, Tremella_lutescens\nTremella_foliacea\nTremella_reticulata\nJew's-ear, Jew's-ears, ear_fungus, Auricularia_auricula\nrust, rust_fungus\naecium\nflax_rust, flax_rust_fungus, Melampsora_lini\nblister_rust, Cronartium_ribicola\nwheat_rust, Puccinia_graminis\napple_rust, cedar-apple_rust, Gymnosporangium_juniperi-virginianae\nsmut, smut_fungus\ncovered_smut\nloose_smut\ncornsmut, corn_smut\nboil_smut, Ustilago_maydis\nSphacelotheca, genus_Sphacelotheca\nhead_smut, Sphacelotheca_reiliana\nbunt, Tilletia_caries\nbunt, stinking_smut, Tilletia_foetida\nonion_smut, Urocystis_cepulae\nflag_smut_fungus\nwheat_flag_smut, Urocystis_tritici\nfelt_fungus, Septobasidium_pseudopedicellatum\nwaxycap\nHygrocybe_acutoconica, conic_waxycap\nHygrophorus_borealis\nHygrophorus_caeruleus\nHygrophorus_inocybiformis\nHygrophorus_kauffmanii\nHygrophorus_marzuolus\nHygrophorus_purpurascens\nHygrophorus_russula\nHygrophorus_sordidus\nHygrophorus_tennesseensis\nHygrophorus_turundus\nNeohygrophorus_angelesianus\nCortinarius_armillatus\nCortinarius_atkinsonianus\nCortinarius_corrugatus\nCortinarius_gentilis\nCortinarius_mutabilis, purple-staining_Cortinarius\nCortinarius_semisanguineus\nCortinarius_subfoetidus\nCortinarius_violaceus\nGymnopilus_spectabilis\nGymnopilus_validipes\nGymnopilus_ventricosus\nmold, mould\nmildew\nverticillium\nmonilia\ncandida\nCandida_albicans, Monilia_albicans\nblastomycete\nyellow_spot_fungus, Cercospora_kopkei\ngreen_smut_fungus, Ustilaginoidea_virens\ndry_rot\nrhizoctinia\nhouseplant\nbedder, bedding_plant\nsucculent\ncultivar\nweed\nwort\nbrier\naril\nsporophyll, sporophyl\nsporangium, spore_case, spore_sac\nsporangiophore\nascus\nascospore\narthrospore\neusporangium\ntetrasporangium\ngametangium\nsorus\nsorus\npartial_veil\nlignum\nvascular_ray, medullary_ray\nphloem, bast\nevergreen, evergreen_plant\ndeciduous_plant\npoisonous_plant\nvine\ncreeper\ntendril\nroot_climber\nlignosae\narborescent_plant\nsnag\ntree\ntimber_tree\ntreelet\narbor\nbean_tree\npollard\nsapling\nshade_tree\ngymnospermous_tree\nconifer, coniferous_tree\nangiospermous_tree, flowering_tree\nnut_tree\nspice_tree\nfever_tree\nstump, tree_stump\nbonsai\nming_tree\nming_tree\nundershrub\nsubshrub, suffrutex\nbramble\nliana\ngeophyte\ndesert_plant, xerophyte, xerophytic_plant, xerophile, xerophilous_plant\nmesophyte, mesophytic_plant\nmarsh_plant, bog_plant, swamp_plant\nhemiepiphyte, semiepiphyte\nstrangler, strangler_tree\nlithophyte, lithophytic_plant\nsaprobe\nautophyte, autophytic_plant, autotroph, autotrophic_organism\nroot\ntaproot\nprop_root\nprophyll\nrootstock\nquickset\nstolon, runner, offset\ntuberous_plant\nrhizome, rootstock, rootstalk\nrachis\ncaudex\ncladode, cladophyll, phylloclad, phylloclade\nreceptacle\nscape, flower_stalk\numbel\npetiole, leafstalk\npeduncle\npedicel, pedicle\nflower_cluster\nraceme\npanicle\nthyrse, thyrsus\ncyme\ncymule\nglomerule\nscorpioid_cyme\near, spike, capitulum\nspadix\nbulbous_plant\nbulbil, bulblet\ncormous_plant\nfruit\nfruitlet\nseed\nbean\nnut\nnutlet\nkernel, meat\nsyconium\nberry\naggregate_fruit, multiple_fruit, syncarp\nsimple_fruit, bacca\nacinus\ndrupe, stone_fruit\ndrupelet\npome, false_fruit\npod, seedpod\nloment\npyxidium, pyxis\nhusk\ncornhusk\npod, cod, seedcase\naccessory_fruit, pseudocarp\nbuckthorn\nbuckthorn_berry, yellow_berry\ncascara_buckthorn, bearberry, bearwood, chittamwood, chittimwood, Rhamnus_purshianus\ncascara, cascara_sagrada, chittam_bark, chittem_bark\nCarolina_buckthorn, indian_cherry, Rhamnus_carolinianus\ncoffeeberry, California_buckthorn, California_coffee, Rhamnus_californicus\nredberry, red-berry, Rhamnus_croceus\nnakedwood\njujube, jujube_bush, Christ's-thorn, Jerusalem_thorn, Ziziphus_jujuba\nChrist's-thorn, Jerusalem_thorn, Paliurus_spina-christi\nhazel, hazel_tree, Pomaderris_apetala\nfox_grape, Vitis_labrusca\nmuscadine, Vitis_rotundifolia\nvinifera, vinifera_grape, common_grape_vine, Vitis_vinifera\nPinot_blanc\nSauvignon_grape\nSauvignon_blanc\nMuscadet\nRiesling\nZinfandel\nChenin_blanc\nmalvasia\nVerdicchio\nBoston_ivy, Japanese_ivy, Parthenocissus_tricuspidata\nVirginia_creeper, American_ivy, woodbine, Parthenocissus_quinquefolia\ntrue_pepper, pepper_vine\nbetel, betel_pepper, Piper_betel\ncubeb\nschizocarp\npeperomia\nwatermelon_begonia, Peperomia_argyreia, Peperomia_sandersii\nyerba_mansa, Anemopsis_californica\npinna, pinnule\nfrond\nbract\nbracteole, bractlet\ninvolucre\nglume\npalmate_leaf\npinnate_leaf\nbijugate_leaf, bijugous_leaf, twice-pinnate\ndecompound_leaf\nacuminate_leaf\ndeltoid_leaf\nensiform_leaf\nlinear_leaf, elongate_leaf\nlyrate_leaf\nobtuse_leaf\noblanceolate_leaf\npandurate_leaf, panduriform_leaf\nreniform_leaf\nspatulate_leaf\neven-pinnate_leaf, abruptly-pinnate_leaf\nodd-pinnate_leaf\npedate_leaf\ncrenate_leaf\ndentate_leaf\ndenticulate_leaf\nerose_leaf\nruncinate_leaf\nprickly-edged_leaf\ndeadwood\nhaulm, halm\nbranchlet, twig, sprig\nosier\ngiant_scrambling_fern, Diplopterygium_longissimum\numbrella_fern, fan_fern, Sticherus_flabellatus, Gleichenia_flabellata\nfloating_fern, water_sprite, Ceratopteris_pteridioides\npolypody\nlicorice_fern, Polypodium_glycyrrhiza\ngrey_polypody, gray_polypody, resurrection_fern, Polypodium_polypodioides\nleatherleaf, leathery_polypody, coast_polypody, Polypodium_scouleri\nrock_polypody, rock_brake, American_wall_fern, Polypodium_virgianum\ncommon_polypody, adder's_fern, wall_fern, golden_maidenhair, golden_polypody, sweet_fern, Polypodium_vulgare\nbear's-paw_fern, Aglaomorpha_meyeniana\nstrap_fern\nFlorida_strap_fern, cow-tongue_fern, hart's-tongue_fern\nbasket_fern, Drynaria_rigidula\nsnake_polypody, Microgramma-piloselloides\nclimbing_bird's_nest_fern, Microsorium_punctatum\ngolden_polypody, serpent_fern, rabbit's-foot_fern, Phlebodium_aureum, Polypodium_aureum\nstaghorn_fern\nSouth_American_staghorn, Platycerium_andinum\ncommon_staghorn_fern, elkhorn_fern, Platycerium_bifurcatum, Platycerium_alcicorne\nfelt_fern, tongue_fern, Pyrrosia_lingua, Cyclophorus_lingua\npotato_fern, Solanopteris_bifrons\nmyrmecophyte\ngrass_fern, ribbon_fern, Vittaria_lineata\nspleenwort\nblack_spleenwort, Asplenium_adiantum-nigrum\nbird's_nest_fern, Asplenium_nidus\nebony_spleenwort, Scott's_Spleenwort, Asplenium_platyneuron\nblack-stem_spleenwort, black-stemmed_spleenwort, little_ebony_spleenwort\nwalking_fern, walking_leaf, Asplenium_rhizophyllum, Camptosorus_rhizophyllus\ngreen_spleenwort, Asplenium_viride\nmountain_spleenwort, Asplenium_montanum\nlobed_spleenwort, Asplenium_pinnatifidum\nlanceolate_spleenwort, Asplenium_billotii\nhart's-tongue, hart's-tongue_fern, Asplenium_scolopendrium, Phyllitis_scolopendrium\nscale_fern, scaly_fern, Asplenium_ceterach, Ceterach_officinarum\nscolopendrium\ndeer_fern, Blechnum_spicant\ndoodia, rasp_fern\nchain_fern\nVirginia_chain_fern, Woodwardia_virginica\nsilver_tree_fern, sago_fern, black_tree_fern, Cyathea_medullaris\ndavallia\nhare's-foot_fern\nCanary_Island_hare's_foot_fern, Davallia_canariensis\nsquirrel's-foot_fern, ball_fern, Davalia_bullata, Davalia_bullata_mariesii, Davallia_Mariesii\nbracken, Pteridium_esculentum\nsoft_tree_fern, Dicksonia_antarctica\nScythian_lamb, Cibotium_barometz\nfalse_bracken, Culcita_dubia\nthyrsopteris, Thyrsopteris_elegans\nshield_fern, buckler_fern\nbroad_buckler-fern, Dryopteris_dilatata\nfragrant_cliff_fern, fragrant_shield_fern, fragrant_wood_fern, Dryopteris_fragrans\nGoldie's_fern, Goldie's_shield_fern, goldie's_wood_fern, Dryopteris_goldiana\nwood_fern, wood-fern, woodfern\nmale_fern, Dryopteris_filix-mas\nmarginal_wood_fern, evergreen_wood_fern, leatherleaf_wood_fern, Dryopteris_marginalis\nmountain_male_fern, Dryopteris_oreades\nlady_fern, Athyrium_filix-femina\nAlpine_lady_fern, Athyrium_distentifolium\nsilvery_spleenwort, glade_fern, narrow-leaved_spleenwort, Athyrium_pycnocarpon, Diplazium_pycnocarpon\nholly_fern, Cyrtomium_aculeatum, Polystichum_aculeatum\nbladder_fern\nbrittle_bladder_fern, brittle_fern, fragile_fern, Cystopteris_fragilis\nmountain_bladder_fern, Cystopteris_montana\nbulblet_fern, bulblet_bladder_fern, berry_fern, Cystopteris_bulbifera\nsilvery_spleenwort, Deparia_acrostichoides, Athyrium_thelypteroides\noak_fern, Gymnocarpium_dryopteris, Thelypteris_dryopteris\nlimestone_fern, northern_oak_fern, Gymnocarpium_robertianum\nostrich_fern, shuttlecock_fern, fiddlehead, Matteuccia_struthiopteris, Pteretis_struthiopteris, Onoclea_struthiopteris\nhart's-tongue, hart's-tongue_fern, Olfersia_cervina, Polybotrya_cervina, Polybotria_cervina\nsensitive_fern, bead_fern, Onoclea_sensibilis\nChristmas_fern, canker_brake, dagger_fern, evergreen_wood_fern, Polystichum_acrostichoides\nholly_fern\nBraun's_holly_fern, prickly_shield_fern, Polystichum_braunii\nwestern_holly_fern, Polystichum_scopulinum\nsoft_shield_fern, Polystichum_setiferum\nleather_fern, leatherleaf_fern, ten-day_fern, Rumohra_adiantiformis, Polystichum_adiantiformis\nbutton_fern, Tectaria_cicutaria\nIndian_button_fern, Tectaria_macrodonta\nwoodsia\nrusty_woodsia, fragrant_woodsia, oblong_woodsia, Woodsia_ilvensis\nAlpine_woodsia, northern_woodsia, flower-cup_fern, Woodsia_alpina\nsmooth_woodsia, Woodsia_glabella\nBoston_fern, Nephrolepis_exaltata, Nephrolepis_exaltata_bostoniensis\nbasket_fern, toothed_sword_fern, Nephrolepis_pectinata\ngolden_fern, leather_fern, Acrostichum_aureum\nmaidenhair, maidenhair_fern\ncommon_maidenhair, Venushair, Venus'-hair_fern, southern_maidenhair, Venus_maidenhair, Adiantum_capillus-veneris\nAmerican_maidenhair_fern, five-fingered_maidenhair_fern, Adiantum_pedatum\nBermuda_maidenhair, Bermuda_maidenhair_fern, Adiantum_bellum\nbrittle_maidenhair, brittle_maidenhair_fern, Adiantum_tenerum\nFarley_maidenhair, Farley_maidenhair_fern, Barbados_maidenhair, glory_fern, Adiantum_tenerum_farleyense\nannual_fern, Jersey_fern, Anogramma_leptophylla\nlip_fern, lipfern\nsmooth_lip_fern, Alabama_lip_fern, Cheilanthes_alabamensis\nlace_fern, Cheilanthes_gracillima\nwooly_lip_fern, hairy_lip_fern, Cheilanthes_lanosa\nsouthwestern_lip_fern, Cheilanthes_eatonii\nbamboo_fern, Coniogramme_japonica\nAmerican_rock_brake, American_parsley_fern, Cryptogramma_acrostichoides\nEuropean_parsley_fern, mountain_parsley_fern, Cryptogramma_crispa\nhand_fern, Doryopteris_pedata\ncliff_brake, cliff-brake, rock_brake\ncoffee_fern, Pellaea_andromedifolia\npurple_rock_brake, Pellaea_atropurpurea\nbird's-foot_fern, Pellaea_mucronata, Pellaea_ornithopus\nbutton_fern, Pellaea_rotundifolia\nsilver_fern, Pityrogramma_argentea\ngolden_fern, Pityrogramma_calomelanos_aureoflava\ngold_fern, Pityrogramma_chrysophylla\nPteris_cretica\nspider_brake, spider_fern, Pteris_multifida\nribbon_fern, spider_fern, Pteris_serrulata\npotato_fern, Marattia_salicina\nangiopteris, giant_fern, Angiopteris_evecta\nskeleton_fork_fern, Psilotum_nudum\nhorsetail\ncommon_horsetail, field_horsetail, Equisetum_arvense\nswamp_horsetail, water_horsetail, Equisetum_fluviatile\nscouring_rush, rough_horsetail, Equisetum_hyemale, Equisetum_hyemale_robustum, Equisetum_robustum\nmarsh_horsetail, Equisetum_palustre\nwood_horsetail, Equisetum_Sylvaticum\nvariegated_horsetail, variegated_scouring_rush, Equisetum_variegatum\nclub_moss, club-moss, lycopod\nshining_clubmoss, Lycopodium_lucidulum\nalpine_clubmoss, Lycopodium_alpinum\nfir_clubmoss, mountain_clubmoss, little_clubmoss, Lycopodium_selago\nground_cedar, staghorn_moss, Lycopodium_complanatum\nground_fir, princess_pine, tree_clubmoss, Lycopodium_obscurum\nfoxtail_grass, Lycopodium_alopecuroides\nspikemoss, spike_moss, little_club_moss\nmeadow_spikemoss, basket_spikemoss, Selaginella_apoda\ndesert_selaginella, Selaginella_eremophila\nresurrection_plant, rose_of_Jericho, Selaginella_lepidophylla\nflorida_selaginella, Selaginella_eatonii\nquillwort\nearthtongue, earth-tongue\nsnuffbox_fern, meadow_fern, Thelypteris_palustris_pubescens, Dryopteris_thelypteris_pubescens\nchristella\nmountain_fern, Oreopteris_limbosperma, Dryopteris_oreopteris\nNew_York_fern, Parathelypteris_novae-boracensis, Dryopteris_noveboracensis\nMassachusetts_fern, Parathelypteris_simulata, Thelypteris_simulata\nbeech_fern\nbroad_beech_fern, southern_beech_fern, Phegopteris_hexagonoptera, Dryopteris_hexagonoptera, Thelypteris_hexagonoptera\nlong_beech_fern, narrow_beech_fern, northern_beech_fern, Phegopteris_connectilis, Dryopteris_phegopteris, Thelypteris_phegopteris\nshoestring_fungus\nArmillaria_caligata, booted_armillaria\nArmillaria_ponderosa, white_matsutake\nArmillaria_zelleri\nhoney_mushroom, honey_fungus, Armillariella_mellea\nmilkweed, silkweed\nwhite_milkweed, Asclepias_albicans\npoke_milkweed, Asclepias_exaltata\nswamp_milkweed, Asclepias_incarnata\nMead's_milkweed, Asclepias_meadii, Asclepia_meadii\npurple_silkweed, Asclepias_purpurascens\nshowy_milkweed, Asclepias_speciosa\npoison_milkweed, horsetail_milkweed, Asclepias_subverticillata\nbutterfly_weed, orange_milkweed, chigger_flower, chiggerflower, pleurisy_root, tuber_root, Indian_paintbrush, Asclepias_tuberosa\nwhorled_milkweed, Asclepias_verticillata\ncruel_plant, Araujia_sericofera\nwax_plant, Hoya_carnosa\nsilk_vine, Periploca_graeca\nstapelia, carrion_flower, starfish_flower\nStapelias_asterias\nstephanotis\nMadagascar_jasmine, waxflower, Stephanotis_floribunda\nnegro_vine, Vincetoxicum_hirsutum, Vincetoxicum_negrum\nzygospore\ntree_of_knowledge\norangery\npocketbook\nshit, dump\ncordage\nyard, pace\nextremum, peak\nleaf_shape, leaf_form\nequilateral\nfigure\npencil\nplane_figure, two-dimensional_figure\nsolid_figure, three-dimensional_figure\nline\nbulb\nconvex_shape, convexity\nconcave_shape, concavity, incurvation, incurvature\ncylinder\nround_shape\nheart\npolygon, polygonal_shape\nconvex_polygon\nconcave_polygon\nreentrant_polygon, reentering_polygon\namorphous_shape\nclosed_curve\nsimple_closed_curve, Jordan_curve\nS-shape\nwave, undulation\nextrados\nhook, crotchet\nenvelope\nbight\ndiameter\ncone, conoid, cone_shape\nfunnel, funnel_shape\noblong\ncircle\ncircle\nequator\nscallop, crenation, crenature, crenel, crenelle\nring, halo, annulus, doughnut, anchor_ring\nloop\nbight\nhelix, spiral\nelement_of_a_cone\nelement_of_a_cylinder\nellipse, oval\nquadrate\ntriangle, trigon, trilateral\nacute_triangle, acute-angled_triangle\nisosceles_triangle\nobtuse_triangle, obtuse-angled_triangle\nright_triangle, right-angled_triangle\nscalene_triangle\nparallel\ntrapezoid\nstar\npentagon\nhexagon\nheptagon\noctagon\nnonagon\ndecagon\nrhombus, rhomb, diamond\nspherical_polygon\nspherical_triangle\nconvex_polyhedron\nconcave_polyhedron\ncuboid\nquadrangular_prism\nbell, bell_shape, campana\nangular_distance\ntrue_anomaly\nspherical_angle\nangle_of_refraction\nacute_angle\ngroove, channel\nrut\nbulge, bump, hump, swelling, gibbosity, gibbousness, jut, prominence, protuberance, protrusion, extrusion, excrescence\nbelly\nbow, arc\ncrescent\nellipsoid\nhypotenuse\nbalance, equilibrium, equipoise, counterbalance\nconformation\nsymmetry, proportion\nspheroid, ellipsoid_of_revolution\nspherule\ntoroid\ncolumn, tower, pillar\nbarrel, drum\npipe, tube\npellet\nbolus\ndewdrop\nridge\nrim\ntaper\nboundary, edge, bound\nincisure, incisura\nnotch\nwrinkle, furrow, crease, crinkle, seam, line\ndermatoglyphic\nfrown_line\nline_of_life, life_line, lifeline\nline_of_heart, heart_line, love_line, mensal_line\ncrevice, cranny, crack, fissure, chap\ncleft\nroulette, line_roulette\nnode\ntree, tree_diagram\nstemma\nbrachium\nfork, crotch\nblock, cube\novoid\ntetrahedron\npentahedron\nhexahedron\nregular_polyhedron, regular_convex_solid, regular_convex_polyhedron, Platonic_body, Platonic_solid, ideal_solid\npolyhedral_angle\ncube, regular_hexahedron\ntruncated_pyramid\ntruncated_cone\ntail, tail_end\ntongue, knife\ntrapezohedron\nwedge, wedge_shape, cuneus\nkeel\nplace, shoes\nherpes\nchlamydia\nwall\nmicronutrient\nchyme\nragweed_pollen\npina_cloth\nchlorobenzylidenemalononitrile, CS_gas\ncarbon, C, atomic_number_6\ncharcoal, wood_coal\nrock, stone\ngravel, crushed_rock\naflatoxin\nalpha-tocopheral\nleopard\nbricks_and_mortar\nlagging\nhydraulic_cement, Portland_cement\ncholine\nconcrete\nglass_wool\nsoil, dirt\nhigh_explosive\nlitter\nfish_meal\nGreek_fire\nculture_medium, medium\nagar, nutrient_agar\nblood_agar\nhip_tile, hipped_tile\nhyacinth, jacinth\nhydroxide_ion, hydroxyl_ion\nice, water_ice\ninositol\nlinoleum, lino\nlithia_water\nlodestone, loadstone\npantothenic_acid, pantothen\npaper\npapyrus\npantile\nblacktop, blacktopping\ntarmacadam, tarmac\npaving, pavement, paving_material\nplaster\npoison_gas\nridge_tile\nroughcast\nsand\nspackle, spackling_compound\nrender\nwattle_and_daub\nstucco\ntear_gas, teargas, lacrimator, lachrymator\ntoilet_tissue, toilet_paper, bathroom_tissue\nlinseed, flaxseed\nvitamin\nfat-soluble_vitamin\nwater-soluble_vitamin\nvitamin_A, antiophthalmic_factor, axerophthol, A\nvitamin_A1, retinol\nvitamin_A2, dehydroretinol\nB-complex_vitamin, B_complex, vitamin_B_complex, vitamin_B, B_vitamin, B\nvitamin_B1, thiamine, thiamin, aneurin, antiberiberi_factor\nvitamin_B12, cobalamin, cyanocobalamin, antipernicious_anemia_factor\nvitamin_B2, vitamin_G, riboflavin, lactoflavin, ovoflavin, hepatoflavin\nvitamin_B6, pyridoxine, pyridoxal, pyridoxamine, adermin\nvitamin_Bc, vitamin_M, folate, folic_acid, folacin, pteroylglutamic_acid, pteroylmonoglutamic_acid\nniacin, nicotinic_acid\nvitamin_D, calciferol, viosterol, ergocalciferol, cholecalciferol, D\nvitamin_E, tocopherol, E\nbiotin, vitamin_H\nvitamin_K, naphthoquinone, antihemorrhagic_factor\nvitamin_K1, phylloquinone, phytonadione\nvitamin_K3, menadione\nvitamin_P, bioflavinoid, citrin\nvitamin_C, C, ascorbic_acid\nplanking\nchipboard, hardboard\nknothole\n"
  },
  {
    "path": "pytorch_classification/grad_cam/main_cnn.py",
    "content": "import os\nimport numpy as np\nimport torch\nfrom PIL import Image\nimport matplotlib.pyplot as plt\nfrom torchvision import models\nfrom torchvision import transforms\nfrom utils import GradCAM, show_cam_on_image, center_crop_img\n\n\ndef main():\n    model = models.mobilenet_v3_large(pretrained=True)\n    target_layers = [model.features[-1]]\n\n    # model = models.vgg16(pretrained=True)\n    # target_layers = [model.features]\n\n    # model = models.resnet34(pretrained=True)\n    # target_layers = [model.layer4]\n\n    # model = models.regnet_y_800mf(pretrained=True)\n    # target_layers = [model.trunk_output]\n\n    # model = models.efficientnet_b0(pretrained=True)\n    # target_layers = [model.features]\n\n    data_transform = transforms.Compose([transforms.ToTensor(),\n                                         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])\n    # load image\n    img_path = \"both.png\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path).convert('RGB')\n    img = np.array(img, dtype=np.uint8)\n    # img = center_crop_img(img, 224)\n\n    # [C, H, W]\n    img_tensor = data_transform(img)\n    # expand batch dimension\n    # [C, H, W] -> [N, C, H, W]\n    input_tensor = torch.unsqueeze(img_tensor, dim=0)\n\n    cam = GradCAM(model=model, target_layers=target_layers, use_cuda=False)\n    target_category = 281  # tabby, tabby cat\n    # target_category = 254  # pug, pug-dog\n\n    grayscale_cam = cam(input_tensor=input_tensor, target_category=target_category)\n\n    grayscale_cam = grayscale_cam[0, :]\n    visualization = show_cam_on_image(img.astype(dtype=np.float32) / 255.,\n                                      grayscale_cam,\n                                      use_rgb=True)\n    plt.imshow(visualization)\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/grad_cam/main_swin.py",
    "content": "import os\nimport math\nimport numpy as np\nimport torch\nfrom PIL import Image\nimport matplotlib.pyplot as plt\nfrom torchvision import transforms\nfrom utils import GradCAM, show_cam_on_image, center_crop_img\nfrom swin_model import swin_base_patch4_window7_224\n\n\nclass ResizeTransform:\n    def __init__(self, im_h: int, im_w: int):\n        self.height = self.feature_size(im_h)\n        self.width = self.feature_size(im_w)\n\n    @staticmethod\n    def feature_size(s):\n        s = math.ceil(s / 4)  # PatchEmbed\n        s = math.ceil(s / 2)  # PatchMerging1\n        s = math.ceil(s / 2)  # PatchMerging2\n        s = math.ceil(s / 2)  # PatchMerging3\n        return s\n\n    def __call__(self, x):\n        result = x.reshape(x.size(0),\n                           self.height,\n                           self.width,\n                           x.size(2))\n\n        # Bring the channels to the first dimension,\n        # like in CNNs.\n        # [batch_size, H, W, C] -> [batch, C, H, W]\n        result = result.permute(0, 3, 1, 2)\n\n        return result\n\n\ndef main():\n    # 注意输入的图片必须是32的整数倍\n    # 否则由于padding的原因会出现注意力飘逸的问题\n    img_size = 224\n    assert img_size % 32 == 0\n\n    model = swin_base_patch4_window7_224()\n    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224.pth\n    weights_path = \"./swin_base_patch4_window7_224.pth\"\n    model.load_state_dict(torch.load(weights_path, map_location=\"cpu\")[\"model\"], strict=False)\n\n    target_layers = [model.norm]\n\n    data_transform = transforms.Compose([transforms.ToTensor(),\n                                         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])\n    # load image\n    img_path = \"both.png\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path).convert('RGB')\n    img = np.array(img, dtype=np.uint8)\n    img = center_crop_img(img, img_size)\n\n    # [C, H, W]\n    img_tensor = data_transform(img)\n    # expand batch dimension\n    # [C, H, W] -> [N, C, H, W]\n    input_tensor = torch.unsqueeze(img_tensor, dim=0)\n\n    cam = GradCAM(model=model, target_layers=target_layers, use_cuda=False,\n                  reshape_transform=ResizeTransform(im_h=img_size, im_w=img_size))\n    target_category = 281  # tabby, tabby cat\n    # target_category = 254  # pug, pug-dog\n\n    grayscale_cam = cam(input_tensor=input_tensor, target_category=target_category)\n\n    grayscale_cam = grayscale_cam[0, :]\n    visualization = show_cam_on_image(img / 255., grayscale_cam, use_rgb=True)\n    plt.imshow(visualization)\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/grad_cam/main_vit.py",
    "content": "import os\nimport numpy as np\nimport torch\nfrom PIL import Image\nimport matplotlib.pyplot as plt\nfrom torchvision import transforms\nfrom utils import GradCAM, show_cam_on_image, center_crop_img\nfrom vit_model import vit_base_patch16_224\n\n\nclass ReshapeTransform:\n    def __init__(self, model):\n        input_size = model.patch_embed.img_size\n        patch_size = model.patch_embed.patch_size\n        self.h = input_size[0] // patch_size[0]\n        self.w = input_size[1] // patch_size[1]\n\n    def __call__(self, x):\n        # remove cls token and reshape\n        # [batch_size, num_tokens, token_dim]\n        result = x[:, 1:, :].reshape(x.size(0),\n                                     self.h,\n                                     self.w,\n                                     x.size(2))\n\n        # Bring the channels to the first dimension,\n        # like in CNNs.\n        # [batch_size, H, W, C] -> [batch, C, H, W]\n        result = result.permute(0, 3, 1, 2)\n        return result\n\n\ndef main():\n    model = vit_base_patch16_224()\n    # 链接: https://pan.baidu.com/s/1zqb08naP0RPqqfSXfkB2EA  密码: eu9f\n    weights_path = \"./vit_base_patch16_224.pth\"\n    model.load_state_dict(torch.load(weights_path, map_location=\"cpu\"))\n    # Since the final classification is done on the class token computed in the last attention block,\n    # the output will not be affected by the 14x14 channels in the last layer.\n    # The gradient of the output with respect to them, will be 0!\n    # We should chose any layer before the final attention block.\n    target_layers = [model.blocks[-1].norm1]\n\n    data_transform = transforms.Compose([transforms.ToTensor(),\n                                         transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])\n    # load image\n    img_path = \"both.png\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path).convert('RGB')\n    img = np.array(img, dtype=np.uint8)\n    img = center_crop_img(img, 224)\n    # [C, H, W]\n    img_tensor = data_transform(img)\n    # expand batch dimension\n    # [C, H, W] -> [N, C, H, W]\n    input_tensor = torch.unsqueeze(img_tensor, dim=0)\n\n    cam = GradCAM(model=model,\n                  target_layers=target_layers,\n                  use_cuda=False,\n                  reshape_transform=ReshapeTransform(model))\n    target_category = 281  # tabby, tabby cat\n    # target_category = 254  # pug, pug-dog\n\n    grayscale_cam = cam(input_tensor=input_tensor, target_category=target_category)\n\n    grayscale_cam = grayscale_cam[0, :]\n    visualization = show_cam_on_image(img / 255., grayscale_cam, use_rgb=True)\n    plt.imshow(visualization)\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/grad_cam/swin_model.py",
    "content": "\"\"\" Swin Transformer\nA PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows`\n    - https://arxiv.org/pdf/2103.14030\n\nCode/weights from https://github.com/microsoft/Swin-Transformer\n\n\"\"\"\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.utils.checkpoint as checkpoint\nimport numpy as np\nfrom typing import Optional\n\n\ndef drop_path_f(x, drop_prob: float = 0., training: bool = False):\n    \"\"\"Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).\n\n    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,\n    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...\n    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for\n    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use\n    'survival rate' as the argument.\n\n    \"\"\"\n    if drop_prob == 0. or not training:\n        return x\n    keep_prob = 1 - drop_prob\n    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets\n    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)\n    random_tensor.floor_()  # binarize\n    output = x.div(keep_prob) * random_tensor\n    return output\n\n\nclass DropPath(nn.Module):\n    \"\"\"Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).\n    \"\"\"\n    def __init__(self, drop_prob=None):\n        super(DropPath, self).__init__()\n        self.drop_prob = drop_prob\n\n    def forward(self, x):\n        return drop_path_f(x, self.drop_prob, self.training)\n\n\ndef window_partition(x, window_size: int):\n    \"\"\"\n    将feature map按照window_size划分成一个个没有重叠的window\n    Args:\n        x: (B, H, W, C)\n        window_size (int): window size(M)\n\n    Returns:\n        windows: (num_windows*B, window_size, window_size, C)\n    \"\"\"\n    B, H, W, C = x.shape\n    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)\n    # permute: [B, H//Mh, Mh, W//Mw, Mw, C] -> [B, H//Mh, W//Mh, Mw, Mw, C]\n    # view: [B, H//Mh, W//Mw, Mh, Mw, C] -> [B*num_windows, Mh, Mw, C]\n    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)\n    return windows\n\n\ndef window_reverse(windows, window_size: int, H: int, W: int):\n    \"\"\"\n    将一个个window还原成一个feature map\n    Args:\n        windows: (num_windows*B, window_size, window_size, C)\n        window_size (int): Window size(M)\n        H (int): Height of image\n        W (int): Width of image\n\n    Returns:\n        x: (B, H, W, C)\n    \"\"\"\n    B = int(windows.shape[0] / (H * W / window_size / window_size))\n    # view: [B*num_windows, Mh, Mw, C] -> [B, H//Mh, W//Mw, Mh, Mw, C]\n    x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)\n    # permute: [B, H//Mh, W//Mw, Mh, Mw, C] -> [B, H//Mh, Mh, W//Mw, Mw, C]\n    # view: [B, H//Mh, Mh, W//Mw, Mw, C] -> [B, H, W, C]\n    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)\n    return x\n\n\nclass PatchEmbed(nn.Module):\n    \"\"\"\n    2D Image to Patch Embedding\n    \"\"\"\n    def __init__(self, patch_size=4, in_c=3, embed_dim=96, norm_layer=None):\n        super().__init__()\n        patch_size = (patch_size, patch_size)\n        self.patch_size = patch_size\n        self.in_chans = in_c\n        self.embed_dim = embed_dim\n        self.proj = nn.Conv2d(in_c, embed_dim, kernel_size=patch_size, stride=patch_size)\n        self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()\n\n    def forward(self, x):\n        _, _, H, W = x.shape\n\n        # padding\n        # 如果输入图片的H，W不是patch_size的整数倍，需要进行padding\n        pad_input = (H % self.patch_size[0] != 0) or (W % self.patch_size[1] != 0)\n        if pad_input:\n            # to pad the last 3 dimensions,\n            # (W_left, W_right, H_top,H_bottom, C_front, C_back)\n            x = F.pad(x, (0, self.patch_size[1] - W % self.patch_size[1],\n                          0, self.patch_size[0] - H % self.patch_size[0],\n                          0, 0))\n\n        # 下采样patch_size倍\n        x = self.proj(x)\n        _, _, H, W = x.shape\n        # flatten: [B, C, H, W] -> [B, C, HW]\n        # transpose: [B, C, HW] -> [B, HW, C]\n        x = x.flatten(2).transpose(1, 2)\n        x = self.norm(x)\n        return x, H, W\n\n\nclass PatchMerging(nn.Module):\n    r\"\"\" Patch Merging Layer.\n\n    Args:\n        dim (int): Number of input channels.\n        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm\n    \"\"\"\n\n    def __init__(self, dim, norm_layer=nn.LayerNorm):\n        super().__init__()\n        self.dim = dim\n        self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)\n        self.norm = norm_layer(4 * dim)\n\n    def forward(self, x, H, W):\n        \"\"\"\n        x: B, H*W, C\n        \"\"\"\n        B, L, C = x.shape\n        assert L == H * W, \"input feature has wrong size\"\n\n        x = x.view(B, H, W, C)\n\n        # padding\n        # 如果输入feature map的H，W不是2的整数倍，需要进行padding\n        pad_input = (H % 2 == 1) or (W % 2 == 1)\n        if pad_input:\n            # to pad the last 3 dimensions, starting from the last dimension and moving forward.\n            # (C_front, C_back, W_left, W_right, H_top, H_bottom)\n            # 注意这里的Tensor通道是[B, H, W, C]，所以会和官方文档有些不同\n            x = F.pad(x, (0, 0, 0, W % 2, 0, H % 2))\n\n        x0 = x[:, 0::2, 0::2, :]  # [B, H/2, W/2, C]\n        x1 = x[:, 1::2, 0::2, :]  # [B, H/2, W/2, C]\n        x2 = x[:, 0::2, 1::2, :]  # [B, H/2, W/2, C]\n        x3 = x[:, 1::2, 1::2, :]  # [B, H/2, W/2, C]\n        x = torch.cat([x0, x1, x2, x3], -1)  # [B, H/2, W/2, 4*C]\n        x = x.view(B, -1, 4 * C)  # [B, H/2*W/2, 4*C]\n\n        x = self.norm(x)\n        x = self.reduction(x)  # [B, H/2*W/2, 2*C]\n\n        return x\n\n\nclass Mlp(nn.Module):\n    \"\"\" MLP as used in Vision Transformer, MLP-Mixer and related networks\n    \"\"\"\n    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):\n        super().__init__()\n        out_features = out_features or in_features\n        hidden_features = hidden_features or in_features\n\n        self.fc1 = nn.Linear(in_features, hidden_features)\n        self.act = act_layer()\n        self.drop1 = nn.Dropout(drop)\n        self.fc2 = nn.Linear(hidden_features, out_features)\n        self.drop2 = nn.Dropout(drop)\n\n    def forward(self, x):\n        x = self.fc1(x)\n        x = self.act(x)\n        x = self.drop1(x)\n        x = self.fc2(x)\n        x = self.drop2(x)\n        return x\n\n\nclass WindowAttention(nn.Module):\n    r\"\"\" Window based multi-head self attention (W-MSA) module with relative position bias.\n    It supports both of shifted and non-shifted window.\n\n    Args:\n        dim (int): Number of input channels.\n        window_size (tuple[int]): The height and width of the window.\n        num_heads (int): Number of attention heads.\n        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True\n        attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0\n        proj_drop (float, optional): Dropout ratio of output. Default: 0.0\n    \"\"\"\n\n    def __init__(self, dim, window_size, num_heads, qkv_bias=True, attn_drop=0., proj_drop=0.):\n\n        super().__init__()\n        self.dim = dim\n        self.window_size = window_size  # [Mh, Mw]\n        self.num_heads = num_heads\n        head_dim = dim // num_heads\n        self.scale = head_dim ** -0.5\n\n        # define a parameter table of relative position bias\n        self.relative_position_bias_table = nn.Parameter(\n            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads))  # [2*Mh-1 * 2*Mw-1, nH]\n\n        # get pair-wise relative position index for each token inside the window\n        coords_h = torch.arange(self.window_size[0])\n        coords_w = torch.arange(self.window_size[1])\n        coords = torch.stack(torch.meshgrid([coords_h, coords_w], indexing=\"ij\"))  # [2, Mh, Mw]\n        coords_flatten = torch.flatten(coords, 1)  # [2, Mh*Mw]\n        # [2, Mh*Mw, 1] - [2, 1, Mh*Mw]\n        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # [2, Mh*Mw, Mh*Mw]\n        relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # [Mh*Mw, Mh*Mw, 2]\n        relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0\n        relative_coords[:, :, 1] += self.window_size[1] - 1\n        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1\n        relative_position_index = relative_coords.sum(-1)  # [Mh*Mw, Mh*Mw]\n        self.register_buffer(\"relative_position_index\", relative_position_index)\n\n        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)\n        self.attn_drop = nn.Dropout(attn_drop)\n        self.proj = nn.Linear(dim, dim)\n        self.proj_drop = nn.Dropout(proj_drop)\n\n        nn.init.trunc_normal_(self.relative_position_bias_table, std=.02)\n        self.softmax = nn.Softmax(dim=-1)\n\n    def forward(self, x, mask: Optional[torch.Tensor] = None):\n        \"\"\"\n        Args:\n            x: input features with shape of (num_windows*B, Mh*Mw, C)\n            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None\n        \"\"\"\n        # [batch_size*num_windows, Mh*Mw, total_embed_dim]\n        B_, N, C = x.shape\n        # qkv(): -> [batch_size*num_windows, Mh*Mw, 3 * total_embed_dim]\n        # reshape: -> [batch_size*num_windows, Mh*Mw, 3, num_heads, embed_dim_per_head]\n        # permute: -> [3, batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head]\n        qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)\n        # [batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head]\n        q, k, v = qkv.unbind(0)  # make torchscript happy (cannot use tensor as tuple)\n\n        # transpose: -> [batch_size*num_windows, num_heads, embed_dim_per_head, Mh*Mw]\n        # @: multiply -> [batch_size*num_windows, num_heads, Mh*Mw, Mh*Mw]\n        q = q * self.scale\n        attn = (q @ k.transpose(-2, -1))\n\n        # relative_position_bias_table.view: [Mh*Mw*Mh*Mw,nH] -> [Mh*Mw,Mh*Mw,nH]\n        relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view(\n            self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)\n        relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # [nH, Mh*Mw, Mh*Mw]\n        attn = attn + relative_position_bias.unsqueeze(0)\n\n        if mask is not None:\n            # mask: [nW, Mh*Mw, Mh*Mw]\n            nW = mask.shape[0]  # num_windows\n            # attn.view: [batch_size, num_windows, num_heads, Mh*Mw, Mh*Mw]\n            # mask.unsqueeze: [1, nW, 1, Mh*Mw, Mh*Mw]\n            attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0)\n            attn = attn.view(-1, self.num_heads, N, N)\n            attn = self.softmax(attn)\n        else:\n            attn = self.softmax(attn)\n\n        attn = self.attn_drop(attn)\n\n        # @: multiply -> [batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head]\n        # transpose: -> [batch_size*num_windows, Mh*Mw, num_heads, embed_dim_per_head]\n        # reshape: -> [batch_size*num_windows, Mh*Mw, total_embed_dim]\n        x = (attn @ v).transpose(1, 2).reshape(B_, N, C)\n        x = self.proj(x)\n        x = self.proj_drop(x)\n        return x\n\n\nclass SwinTransformerBlock(nn.Module):\n    r\"\"\" Swin Transformer Block.\n\n    Args:\n        dim (int): Number of input channels.\n        num_heads (int): Number of attention heads.\n        window_size (int): Window size.\n        shift_size (int): Shift size for SW-MSA.\n        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.\n        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True\n        drop (float, optional): Dropout rate. Default: 0.0\n        attn_drop (float, optional): Attention dropout rate. Default: 0.0\n        drop_path (float, optional): Stochastic depth rate. Default: 0.0\n        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU\n        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm\n    \"\"\"\n\n    def __init__(self, dim, num_heads, window_size=7, shift_size=0,\n                 mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0., drop_path=0.,\n                 act_layer=nn.GELU, norm_layer=nn.LayerNorm):\n        super().__init__()\n        self.dim = dim\n        self.num_heads = num_heads\n        self.window_size = window_size\n        self.shift_size = shift_size\n        self.mlp_ratio = mlp_ratio\n        assert 0 <= self.shift_size < self.window_size, \"shift_size must in 0-window_size\"\n\n        self.norm1 = norm_layer(dim)\n        self.attn = WindowAttention(\n            dim, window_size=(self.window_size, self.window_size), num_heads=num_heads, qkv_bias=qkv_bias,\n            attn_drop=attn_drop, proj_drop=drop)\n\n        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()\n        self.norm2 = norm_layer(dim)\n        mlp_hidden_dim = int(dim * mlp_ratio)\n        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)\n\n    def forward(self, x, attn_mask):\n        H, W = self.H, self.W\n        B, L, C = x.shape\n        assert L == H * W, \"input feature has wrong size\"\n\n        shortcut = x\n        x = self.norm1(x)\n        x = x.view(B, H, W, C)\n\n        # pad feature maps to multiples of window size\n        # 把feature map给pad到window size的整数倍\n        pad_l = pad_t = 0\n        pad_r = (self.window_size - W % self.window_size) % self.window_size\n        pad_b = (self.window_size - H % self.window_size) % self.window_size\n        x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b))\n        _, Hp, Wp, _ = x.shape\n\n        # cyclic shift\n        if self.shift_size > 0:\n            shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))\n        else:\n            shifted_x = x\n            attn_mask = None\n\n        # partition windows\n        x_windows = window_partition(shifted_x, self.window_size)  # [nW*B, Mh, Mw, C]\n        x_windows = x_windows.view(-1, self.window_size * self.window_size, C)  # [nW*B, Mh*Mw, C]\n\n        # W-MSA/SW-MSA\n        attn_windows = self.attn(x_windows, mask=attn_mask)  # [nW*B, Mh*Mw, C]\n\n        # merge windows\n        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)  # [nW*B, Mh, Mw, C]\n        shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp)  # [B, H', W', C]\n\n        # reverse cyclic shift\n        if self.shift_size > 0:\n            x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))\n        else:\n            x = shifted_x\n\n        if pad_r > 0 or pad_b > 0:\n            # 把前面pad的数据移除掉\n            x = x[:, :H, :W, :].contiguous()\n\n        x = x.view(B, H * W, C)\n\n        # FFN\n        x = shortcut + self.drop_path(x)\n        x = x + self.drop_path(self.mlp(self.norm2(x)))\n\n        return x\n\n\nclass BasicLayer(nn.Module):\n    \"\"\"\n    A basic Swin Transformer layer for one stage.\n\n    Args:\n        dim (int): Number of input channels.\n        depth (int): Number of blocks.\n        num_heads (int): Number of attention heads.\n        window_size (int): Local window size.\n        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.\n        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True\n        drop (float, optional): Dropout rate. Default: 0.0\n        attn_drop (float, optional): Attention dropout rate. Default: 0.0\n        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0\n        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm\n        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None\n        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.\n    \"\"\"\n\n    def __init__(self, dim, depth, num_heads, window_size,\n                 mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0.,\n                 drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False):\n        super().__init__()\n        self.dim = dim\n        self.depth = depth\n        self.window_size = window_size\n        self.use_checkpoint = use_checkpoint\n        self.shift_size = window_size // 2\n\n        # build blocks\n        self.blocks = nn.ModuleList([\n            SwinTransformerBlock(\n                dim=dim,\n                num_heads=num_heads,\n                window_size=window_size,\n                shift_size=0 if (i % 2 == 0) else self.shift_size,\n                mlp_ratio=mlp_ratio,\n                qkv_bias=qkv_bias,\n                drop=drop,\n                attn_drop=attn_drop,\n                drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,\n                norm_layer=norm_layer)\n            for i in range(depth)])\n\n        # patch merging layer\n        if downsample is not None:\n            self.downsample = downsample(dim=dim, norm_layer=norm_layer)\n        else:\n            self.downsample = None\n\n    def create_mask(self, x, H, W):\n        # calculate attention mask for SW-MSA\n        # 保证Hp和Wp是window_size的整数倍\n        Hp = int(np.ceil(H / self.window_size)) * self.window_size\n        Wp = int(np.ceil(W / self.window_size)) * self.window_size\n        # 拥有和feature map一样的通道排列顺序，方便后续window_partition\n        img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device)  # [1, Hp, Wp, 1]\n        h_slices = (slice(0, -self.window_size),\n                    slice(-self.window_size, -self.shift_size),\n                    slice(-self.shift_size, None))\n        w_slices = (slice(0, -self.window_size),\n                    slice(-self.window_size, -self.shift_size),\n                    slice(-self.shift_size, None))\n        cnt = 0\n        for h in h_slices:\n            for w in w_slices:\n                img_mask[:, h, w, :] = cnt\n                cnt += 1\n\n        mask_windows = window_partition(img_mask, self.window_size)  # [nW, Mh, Mw, 1]\n        mask_windows = mask_windows.view(-1, self.window_size * self.window_size)  # [nW, Mh*Mw]\n        attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)  # [nW, 1, Mh*Mw] - [nW, Mh*Mw, 1]\n        # [nW, Mh*Mw, Mh*Mw]\n        attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))\n        return attn_mask\n\n    def forward(self, x, H, W):\n        attn_mask = self.create_mask(x, H, W)  # [nW, Mh*Mw, Mh*Mw]\n        for blk in self.blocks:\n            blk.H, blk.W = H, W\n            if not torch.jit.is_scripting() and self.use_checkpoint:\n                x = checkpoint.checkpoint(blk, x, attn_mask)\n            else:\n                x = blk(x, attn_mask)\n        if self.downsample is not None:\n            x = self.downsample(x, H, W)\n            H, W = (H + 1) // 2, (W + 1) // 2\n\n        return x, H, W\n\n\nclass SwinTransformer(nn.Module):\n    r\"\"\" Swin Transformer\n        A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows`  -\n          https://arxiv.org/pdf/2103.14030\n\n    Args:\n        patch_size (int | tuple(int)): Patch size. Default: 4\n        in_chans (int): Number of input image channels. Default: 3\n        num_classes (int): Number of classes for classification head. Default: 1000\n        embed_dim (int): Patch embedding dimension. Default: 96\n        depths (tuple(int)): Depth of each Swin Transformer layer.\n        num_heads (tuple(int)): Number of attention heads in different layers.\n        window_size (int): Window size. Default: 7\n        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4\n        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True\n        drop_rate (float): Dropout rate. Default: 0\n        attn_drop_rate (float): Attention dropout rate. Default: 0\n        drop_path_rate (float): Stochastic depth rate. Default: 0.1\n        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.\n        patch_norm (bool): If True, add normalization after patch embedding. Default: True\n        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False\n    \"\"\"\n\n    def __init__(self, patch_size=4, in_chans=3, num_classes=1000,\n                 embed_dim=96, depths=(2, 2, 6, 2), num_heads=(3, 6, 12, 24),\n                 window_size=7, mlp_ratio=4., qkv_bias=True,\n                 drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1,\n                 norm_layer=nn.LayerNorm, patch_norm=True,\n                 use_checkpoint=False, **kwargs):\n        super().__init__()\n\n        self.num_classes = num_classes\n        self.num_layers = len(depths)\n        self.embed_dim = embed_dim\n        self.patch_norm = patch_norm\n        # stage4输出特征矩阵的channels\n        self.num_features = int(embed_dim * 2 ** (self.num_layers - 1))\n        self.mlp_ratio = mlp_ratio\n\n        # split image into non-overlapping patches\n        self.patch_embed = PatchEmbed(\n            patch_size=patch_size, in_c=in_chans, embed_dim=embed_dim,\n            norm_layer=norm_layer if self.patch_norm else None)\n        self.pos_drop = nn.Dropout(p=drop_rate)\n\n        # stochastic depth\n        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule\n\n        # build layers\n        self.layers = nn.ModuleList()\n        for i_layer in range(self.num_layers):\n            # 注意这里构建的stage和论文图中有些差异\n            # 这里的stage不包含该stage的patch_merging层，包含的是下个stage的\n            layers = BasicLayer(dim=int(embed_dim * 2 ** i_layer),\n                                depth=depths[i_layer],\n                                num_heads=num_heads[i_layer],\n                                window_size=window_size,\n                                mlp_ratio=self.mlp_ratio,\n                                qkv_bias=qkv_bias,\n                                drop=drop_rate,\n                                attn_drop=attn_drop_rate,\n                                drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],\n                                norm_layer=norm_layer,\n                                downsample=PatchMerging if (i_layer < self.num_layers - 1) else None,\n                                use_checkpoint=use_checkpoint)\n            self.layers.append(layers)\n\n        self.norm = norm_layer(self.num_features)\n        self.avgpool = nn.AdaptiveAvgPool1d(1)\n        self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity()\n\n        self.apply(self._init_weights)\n\n    def _init_weights(self, m):\n        if isinstance(m, nn.Linear):\n            nn.init.trunc_normal_(m.weight, std=.02)\n            if isinstance(m, nn.Linear) and m.bias is not None:\n                nn.init.constant_(m.bias, 0)\n        elif isinstance(m, nn.LayerNorm):\n            nn.init.constant_(m.bias, 0)\n            nn.init.constant_(m.weight, 1.0)\n\n    def forward(self, x):\n        # x: [B, L, C]\n        x, H, W = self.patch_embed(x)\n        x = self.pos_drop(x)\n\n        for layer in self.layers:\n            x, H, W = layer(x, H, W)\n\n        x = self.norm(x)  # [B, L, C]\n        x = self.avgpool(x.transpose(1, 2))  # [B, C, 1]\n        x = torch.flatten(x, 1)\n        x = self.head(x)\n        return x\n\n\ndef swin_tiny_patch4_window7_224(num_classes: int = 1000, **kwargs):\n    # trained ImageNet-1K\n    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth\n    model = SwinTransformer(in_chans=3,\n                            patch_size=4,\n                            window_size=7,\n                            embed_dim=96,\n                            depths=(2, 2, 6, 2),\n                            num_heads=(3, 6, 12, 24),\n                            num_classes=num_classes,\n                            **kwargs)\n    return model\n\n\ndef swin_small_patch4_window7_224(num_classes: int = 1000, **kwargs):\n    # trained ImageNet-1K\n    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth\n    model = SwinTransformer(in_chans=3,\n                            patch_size=4,\n                            window_size=7,\n                            embed_dim=96,\n                            depths=(2, 2, 18, 2),\n                            num_heads=(3, 6, 12, 24),\n                            num_classes=num_classes,\n                            **kwargs)\n    return model\n\n\ndef swin_base_patch4_window7_224(num_classes: int = 1000, **kwargs):\n    # trained ImageNet-1K\n    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224.pth\n    model = SwinTransformer(in_chans=3,\n                            patch_size=4,\n                            window_size=7,\n                            embed_dim=128,\n                            depths=(2, 2, 18, 2),\n                            num_heads=(4, 8, 16, 32),\n                            num_classes=num_classes,\n                            **kwargs)\n    return model\n\n\ndef swin_base_patch4_window12_384(num_classes: int = 1000, **kwargs):\n    # trained ImageNet-1K\n    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384.pth\n    model = SwinTransformer(in_chans=3,\n                            patch_size=4,\n                            window_size=12,\n                            embed_dim=128,\n                            depths=(2, 2, 18, 2),\n                            num_heads=(4, 8, 16, 32),\n                            num_classes=num_classes,\n                            **kwargs)\n    return model\n\n\ndef swin_base_patch4_window7_224_in22k(num_classes: int = 21841, **kwargs):\n    # trained ImageNet-22K\n    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22k.pth\n    model = SwinTransformer(in_chans=3,\n                            patch_size=4,\n                            window_size=7,\n                            embed_dim=128,\n                            depths=(2, 2, 18, 2),\n                            num_heads=(4, 8, 16, 32),\n                            num_classes=num_classes,\n                            **kwargs)\n    return model\n\n\ndef swin_base_patch4_window12_384_in22k(num_classes: int = 21841, **kwargs):\n    # trained ImageNet-22K\n    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth\n    model = SwinTransformer(in_chans=3,\n                            patch_size=4,\n                            window_size=12,\n                            embed_dim=128,\n                            depths=(2, 2, 18, 2),\n                            num_heads=(4, 8, 16, 32),\n                            num_classes=num_classes,\n                            **kwargs)\n    return model\n\n\ndef swin_large_patch4_window7_224_in22k(num_classes: int = 21841, **kwargs):\n    # trained ImageNet-22K\n    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window7_224_22k.pth\n    model = SwinTransformer(in_chans=3,\n                            patch_size=4,\n                            window_size=7,\n                            embed_dim=192,\n                            depths=(2, 2, 18, 2),\n                            num_heads=(6, 12, 24, 48),\n                            num_classes=num_classes,\n                            **kwargs)\n    return model\n\n\ndef swin_large_patch4_window12_384_in22k(num_classes: int = 21841, **kwargs):\n    # trained ImageNet-22K\n    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth\n    model = SwinTransformer(in_chans=3,\n                            patch_size=4,\n                            window_size=12,\n                            embed_dim=192,\n                            depths=(2, 2, 18, 2),\n                            num_heads=(6, 12, 24, 48),\n                            num_classes=num_classes,\n                            **kwargs)\n    return model\n"
  },
  {
    "path": "pytorch_classification/grad_cam/utils.py",
    "content": "import cv2\nimport numpy as np\n\n\nclass ActivationsAndGradients:\n    \"\"\" Class for extracting activations and\n    registering gradients from targeted intermediate layers \"\"\"\n\n    def __init__(self, model, target_layers, reshape_transform):\n        self.model = model\n        self.gradients = []\n        self.activations = []\n        self.reshape_transform = reshape_transform\n        self.handles = []\n        for target_layer in target_layers:\n            self.handles.append(\n                target_layer.register_forward_hook(\n                    self.save_activation))\n            # Backward compatibility with older pytorch versions:\n            if hasattr(target_layer, 'register_full_backward_hook'):\n                self.handles.append(\n                    target_layer.register_full_backward_hook(\n                        self.save_gradient))\n            else:\n                self.handles.append(\n                    target_layer.register_backward_hook(\n                        self.save_gradient))\n\n    def save_activation(self, module, input, output):\n        activation = output\n        if self.reshape_transform is not None:\n            activation = self.reshape_transform(activation)\n        self.activations.append(activation.cpu().detach())\n\n    def save_gradient(self, module, grad_input, grad_output):\n        # Gradients are computed in reverse order\n        grad = grad_output[0]\n        if self.reshape_transform is not None:\n            grad = self.reshape_transform(grad)\n        self.gradients = [grad.cpu().detach()] + self.gradients\n\n    def __call__(self, x):\n        self.gradients = []\n        self.activations = []\n        return self.model(x)\n\n    def release(self):\n        for handle in self.handles:\n            handle.remove()\n\n\nclass GradCAM:\n    def __init__(self,\n                 model,\n                 target_layers,\n                 reshape_transform=None,\n                 use_cuda=False):\n        self.model = model.eval()\n        self.target_layers = target_layers\n        self.reshape_transform = reshape_transform\n        self.cuda = use_cuda\n        if self.cuda:\n            self.model = model.cuda()\n        self.activations_and_grads = ActivationsAndGradients(\n            self.model, target_layers, reshape_transform)\n\n    \"\"\" Get a vector of weights for every channel in the target layer.\n        Methods that return weights channels,\n        will typically need to only implement this function. \"\"\"\n\n    @staticmethod\n    def get_cam_weights(grads):\n        return np.mean(grads, axis=(2, 3), keepdims=True)\n\n    @staticmethod\n    def get_loss(output, target_category):\n        loss = 0\n        for i in range(len(target_category)):\n            loss = loss + output[i, target_category[i]]\n        return loss\n\n    def get_cam_image(self, activations, grads):\n        weights = self.get_cam_weights(grads)\n        weighted_activations = weights * activations\n        cam = weighted_activations.sum(axis=1)\n\n        return cam\n\n    @staticmethod\n    def get_target_width_height(input_tensor):\n        width, height = input_tensor.size(-1), input_tensor.size(-2)\n        return width, height\n\n    def compute_cam_per_layer(self, input_tensor):\n        activations_list = [a.cpu().data.numpy()\n                            for a in self.activations_and_grads.activations]\n        grads_list = [g.cpu().data.numpy()\n                      for g in self.activations_and_grads.gradients]\n        target_size = self.get_target_width_height(input_tensor)\n\n        cam_per_target_layer = []\n        # Loop over the saliency image from every layer\n\n        for layer_activations, layer_grads in zip(activations_list, grads_list):\n            cam = self.get_cam_image(layer_activations, layer_grads)\n            cam[cam < 0] = 0  # works like mute the min-max scale in the function of scale_cam_image\n            scaled = self.scale_cam_image(cam, target_size)\n            cam_per_target_layer.append(scaled[:, None, :])\n\n        return cam_per_target_layer\n\n    def aggregate_multi_layers(self, cam_per_target_layer):\n        cam_per_target_layer = np.concatenate(cam_per_target_layer, axis=1)\n        cam_per_target_layer = np.maximum(cam_per_target_layer, 0)\n        result = np.mean(cam_per_target_layer, axis=1)\n        return self.scale_cam_image(result)\n\n    @staticmethod\n    def scale_cam_image(cam, target_size=None):\n        result = []\n        for img in cam:\n            img = img - np.min(img)\n            img = img / (1e-7 + np.max(img))\n            if target_size is not None:\n                img = cv2.resize(img, target_size)\n            result.append(img)\n        result = np.float32(result)\n\n        return result\n\n    def __call__(self, input_tensor, target_category=None):\n\n        if self.cuda:\n            input_tensor = input_tensor.cuda()\n\n        # 正向传播得到网络输出logits(未经过softmax)\n        output = self.activations_and_grads(input_tensor)\n        if isinstance(target_category, int):\n            target_category = [target_category] * input_tensor.size(0)\n\n        if target_category is None:\n            target_category = np.argmax(output.cpu().data.numpy(), axis=-1)\n            print(f\"category id: {target_category}\")\n        else:\n            assert (len(target_category) == input_tensor.size(0))\n\n        self.model.zero_grad()\n        loss = self.get_loss(output, target_category)\n        loss.backward(retain_graph=True)\n\n        # In most of the saliency attribution papers, the saliency is\n        # computed with a single target layer.\n        # Commonly it is the last convolutional layer.\n        # Here we support passing a list with multiple target layers.\n        # It will compute the saliency image for every image,\n        # and then aggregate them (with a default mean aggregation).\n        # This gives you more flexibility in case you just want to\n        # use all conv layers for example, all Batchnorm layers,\n        # or something else.\n        cam_per_layer = self.compute_cam_per_layer(input_tensor)\n        return self.aggregate_multi_layers(cam_per_layer)\n\n    def __del__(self):\n        self.activations_and_grads.release()\n\n    def __enter__(self):\n        return self\n\n    def __exit__(self, exc_type, exc_value, exc_tb):\n        self.activations_and_grads.release()\n        if isinstance(exc_value, IndexError):\n            # Handle IndexError here...\n            print(\n                f\"An exception occurred in CAM with block: {exc_type}. Message: {exc_value}\")\n            return True\n\n\ndef show_cam_on_image(img: np.ndarray,\n                      mask: np.ndarray,\n                      use_rgb: bool = False,\n                      colormap: int = cv2.COLORMAP_JET) -> np.ndarray:\n    \"\"\" This function overlays the cam mask on the image as an heatmap.\n    By default the heatmap is in BGR format.\n\n    :param img: The base image in RGB or BGR format.\n    :param mask: The cam mask.\n    :param use_rgb: Whether to use an RGB or BGR heatmap, this should be set to True if 'img' is in RGB format.\n    :param colormap: The OpenCV colormap to be used.\n    :returns: The default image with the cam overlay.\n    \"\"\"\n\n    heatmap = cv2.applyColorMap(np.uint8(255 * mask), colormap)\n    if use_rgb:\n        heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)\n    heatmap = np.float32(heatmap) / 255\n\n    if np.max(img) > 1:\n        raise Exception(\n            \"The input image should np.float32 in the range [0, 1]\")\n\n    cam = heatmap + img\n    cam = cam / np.max(cam)\n    return np.uint8(255 * cam)\n\n\ndef center_crop_img(img: np.ndarray, size: int):\n    h, w, c = img.shape\n\n    if w == h == size:\n        return img\n\n    if w < h:\n        ratio = size / w\n        new_w = size\n        new_h = int(h * ratio)\n    else:\n        ratio = size / h\n        new_h = size\n        new_w = int(w * ratio)\n\n    img = cv2.resize(img, dsize=(new_w, new_h))\n\n    if new_w == size:\n        h = (new_h - size) // 2\n        img = img[h: h+size]\n    else:\n        w = (new_w - size) // 2\n        img = img[:, w: w+size]\n\n    return img\n"
  },
  {
    "path": "pytorch_classification/grad_cam/vit_model.py",
    "content": "\"\"\"\noriginal code from rwightman:\nhttps://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py\n\"\"\"\nfrom functools import partial\nfrom collections import OrderedDict\n\nimport torch\nimport torch.nn as nn\n\n\ndef drop_path(x, drop_prob: float = 0., training: bool = False):\n    \"\"\"\n    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).\n    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,\n    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...\n    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for\n    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use\n    'survival rate' as the argument.\n    \"\"\"\n    if drop_prob == 0. or not training:\n        return x\n    keep_prob = 1 - drop_prob\n    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets\n    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)\n    random_tensor.floor_()  # binarize\n    output = x.div(keep_prob) * random_tensor\n    return output\n\n\nclass DropPath(nn.Module):\n    \"\"\"\n    Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).\n    \"\"\"\n    def __init__(self, drop_prob=None):\n        super(DropPath, self).__init__()\n        self.drop_prob = drop_prob\n\n    def forward(self, x):\n        return drop_path(x, self.drop_prob, self.training)\n\n\nclass PatchEmbed(nn.Module):\n    \"\"\"\n    2D Image to Patch Embedding\n    \"\"\"\n    def __init__(self, img_size=224, patch_size=16, in_c=3, embed_dim=768, norm_layer=None):\n        super().__init__()\n        img_size = (img_size, img_size)\n        patch_size = (patch_size, patch_size)\n        self.img_size = img_size\n        self.patch_size = patch_size\n        self.grid_size = (img_size[0] // patch_size[0], img_size[1] // patch_size[1])\n        self.num_patches = self.grid_size[0] * self.grid_size[1]\n\n        self.proj = nn.Conv2d(in_c, embed_dim, kernel_size=patch_size, stride=patch_size)\n        self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()\n\n    def forward(self, x):\n        B, C, H, W = x.shape\n        assert H == self.img_size[0] and W == self.img_size[1], \\\n            f\"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]}).\"\n\n        # flatten: [B, C, H, W] -> [B, C, HW]\n        # transpose: [B, C, HW] -> [B, HW, C]\n        x = self.proj(x).flatten(2).transpose(1, 2)\n        x = self.norm(x)\n        return x\n\n\nclass Attention(nn.Module):\n    def __init__(self,\n                 dim,   # 输入token的dim\n                 num_heads=8,\n                 qkv_bias=False,\n                 qk_scale=None,\n                 attn_drop_ratio=0.,\n                 proj_drop_ratio=0.):\n        super(Attention, self).__init__()\n        self.num_heads = num_heads\n        head_dim = dim // num_heads\n        self.scale = qk_scale or head_dim ** -0.5\n        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)\n        self.attn_drop = nn.Dropout(attn_drop_ratio)\n        self.proj = nn.Linear(dim, dim)\n        self.proj_drop = nn.Dropout(proj_drop_ratio)\n\n    def forward(self, x):\n        # [batch_size, num_patches + 1, total_embed_dim]\n        B, N, C = x.shape\n\n        # qkv(): -> [batch_size, num_patches + 1, 3 * total_embed_dim]\n        # reshape: -> [batch_size, num_patches + 1, 3, num_heads, embed_dim_per_head]\n        # permute: -> [3, batch_size, num_heads, num_patches + 1, embed_dim_per_head]\n        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)\n        # [batch_size, num_heads, num_patches + 1, embed_dim_per_head]\n        q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple)\n\n        # transpose: -> [batch_size, num_heads, embed_dim_per_head, num_patches + 1]\n        # @: multiply -> [batch_size, num_heads, num_patches + 1, num_patches + 1]\n        attn = (q @ k.transpose(-2, -1)) * self.scale\n        attn = attn.softmax(dim=-1)\n        attn = self.attn_drop(attn)\n\n        # @: multiply -> [batch_size, num_heads, num_patches + 1, embed_dim_per_head]\n        # transpose: -> [batch_size, num_patches + 1, num_heads, embed_dim_per_head]\n        # reshape: -> [batch_size, num_patches + 1, total_embed_dim]\n        x = (attn @ v).transpose(1, 2).reshape(B, N, C)\n        x = self.proj(x)\n        x = self.proj_drop(x)\n        return x\n\n\nclass Mlp(nn.Module):\n    \"\"\"\n    MLP as used in Vision Transformer, MLP-Mixer and related networks\n    \"\"\"\n    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):\n        super().__init__()\n        out_features = out_features or in_features\n        hidden_features = hidden_features or in_features\n        self.fc1 = nn.Linear(in_features, hidden_features)\n        self.act = act_layer()\n        self.fc2 = nn.Linear(hidden_features, out_features)\n        self.drop = nn.Dropout(drop)\n\n    def forward(self, x):\n        x = self.fc1(x)\n        x = self.act(x)\n        x = self.drop(x)\n        x = self.fc2(x)\n        x = self.drop(x)\n        return x\n\n\nclass Block(nn.Module):\n    def __init__(self,\n                 dim,\n                 num_heads,\n                 mlp_ratio=4.,\n                 qkv_bias=False,\n                 qk_scale=None,\n                 drop_ratio=0.,\n                 attn_drop_ratio=0.,\n                 drop_path_ratio=0.,\n                 act_layer=nn.GELU,\n                 norm_layer=nn.LayerNorm):\n        super(Block, self).__init__()\n        self.norm1 = norm_layer(dim)\n        self.attn = Attention(dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,\n                              attn_drop_ratio=attn_drop_ratio, proj_drop_ratio=drop_ratio)\n        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here\n        self.drop_path = DropPath(drop_path_ratio) if drop_path_ratio > 0. else nn.Identity()\n        self.norm2 = norm_layer(dim)\n        mlp_hidden_dim = int(dim * mlp_ratio)\n        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop_ratio)\n\n    def forward(self, x):\n        x = x + self.drop_path(self.attn(self.norm1(x)))\n        x = x + self.drop_path(self.mlp(self.norm2(x)))\n        return x\n\n\nclass VisionTransformer(nn.Module):\n    def __init__(self, img_size=224, patch_size=16, in_c=3, num_classes=1000,\n                 embed_dim=768, depth=12, num_heads=12, mlp_ratio=4.0, qkv_bias=True,\n                 qk_scale=None, representation_size=None, distilled=False, drop_ratio=0.,\n                 attn_drop_ratio=0., drop_path_ratio=0., embed_layer=PatchEmbed, norm_layer=None,\n                 act_layer=None):\n        \"\"\"\n        Args:\n            img_size (int, tuple): input image size\n            patch_size (int, tuple): patch size\n            in_c (int): number of input channels\n            num_classes (int): number of classes for classification head\n            embed_dim (int): embedding dimension\n            depth (int): depth of transformer\n            num_heads (int): number of attention heads\n            mlp_ratio (int): ratio of mlp hidden dim to embedding dim\n            qkv_bias (bool): enable bias for qkv if True\n            qk_scale (float): override default qk scale of head_dim ** -0.5 if set\n            representation_size (Optional[int]): enable and set representation layer (pre-logits) to this value if set\n            distilled (bool): model includes a distillation token and head as in DeiT models\n            drop_ratio (float): dropout rate\n            attn_drop_ratio (float): attention dropout rate\n            drop_path_ratio (float): stochastic depth rate\n            embed_layer (nn.Module): patch embedding layer\n            norm_layer: (nn.Module): normalization layer\n        \"\"\"\n        super(VisionTransformer, self).__init__()\n        self.num_classes = num_classes\n        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models\n        self.num_tokens = 2 if distilled else 1\n        norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)\n        act_layer = act_layer or nn.GELU\n\n        self.patch_embed = embed_layer(img_size=img_size, patch_size=patch_size, in_c=in_c, embed_dim=embed_dim)\n        num_patches = self.patch_embed.num_patches\n\n        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))\n        self.dist_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) if distilled else None\n        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + self.num_tokens, embed_dim))\n        self.pos_drop = nn.Dropout(p=drop_ratio)\n\n        dpr = [x.item() for x in torch.linspace(0, drop_path_ratio, depth)]  # stochastic depth decay rule\n        self.blocks = nn.Sequential(*[\n            Block(dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,\n                  drop_ratio=drop_ratio, attn_drop_ratio=attn_drop_ratio, drop_path_ratio=dpr[i],\n                  norm_layer=norm_layer, act_layer=act_layer)\n            for i in range(depth)\n        ])\n        self.norm = norm_layer(embed_dim)\n\n        # Representation layer\n        if representation_size and not distilled:\n            self.has_logits = True\n            self.num_features = representation_size\n            self.pre_logits = nn.Sequential(OrderedDict([\n                (\"fc\", nn.Linear(embed_dim, representation_size)),\n                (\"act\", nn.Tanh())\n            ]))\n        else:\n            self.has_logits = False\n            self.pre_logits = nn.Identity()\n\n        # Classifier head(s)\n        self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity()\n        self.head_dist = None\n        if distilled:\n            self.head_dist = nn.Linear(self.embed_dim, self.num_classes) if num_classes > 0 else nn.Identity()\n\n        # Weight init\n        nn.init.trunc_normal_(self.pos_embed, std=0.02)\n        if self.dist_token is not None:\n            nn.init.trunc_normal_(self.dist_token, std=0.02)\n\n        nn.init.trunc_normal_(self.cls_token, std=0.02)\n        self.apply(_init_vit_weights)\n\n    def forward_features(self, x):\n        # [B, C, H, W] -> [B, num_patches, embed_dim]\n        x = self.patch_embed(x)  # [B, 196, 768]\n        # [1, 1, 768] -> [B, 1, 768]\n        cls_token = self.cls_token.expand(x.shape[0], -1, -1)\n        if self.dist_token is None:\n            x = torch.cat((cls_token, x), dim=1)  # [B, 197, 768]\n        else:\n            x = torch.cat((cls_token, self.dist_token.expand(x.shape[0], -1, -1), x), dim=1)\n\n        x = self.pos_drop(x + self.pos_embed)\n        x = self.blocks(x)\n        x = self.norm(x)\n        if self.dist_token is None:\n            return self.pre_logits(x[:, 0])\n        else:\n            return x[:, 0], x[:, 1]\n\n    def forward(self, x):\n        x = self.forward_features(x)\n        if self.head_dist is not None:\n            x, x_dist = self.head(x[0]), self.head_dist(x[1])\n            if self.training and not torch.jit.is_scripting():\n                # during inference, return the average of both classifier predictions\n                return x, x_dist\n            else:\n                return (x + x_dist) / 2\n        else:\n            x = self.head(x)\n        return x\n\n\ndef _init_vit_weights(m):\n    \"\"\"\n    ViT weight initialization\n    :param m: module\n    \"\"\"\n    if isinstance(m, nn.Linear):\n        nn.init.trunc_normal_(m.weight, std=.01)\n        if m.bias is not None:\n            nn.init.zeros_(m.bias)\n    elif isinstance(m, nn.Conv2d):\n        nn.init.kaiming_normal_(m.weight, mode=\"fan_out\")\n        if m.bias is not None:\n            nn.init.zeros_(m.bias)\n    elif isinstance(m, nn.LayerNorm):\n        nn.init.zeros_(m.bias)\n        nn.init.ones_(m.weight)\n\n\ndef vit_base_patch16_224(num_classes: int = 1000):\n    \"\"\"\n    ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).\n    ImageNet-1k weights @ 224x224, source https://github.com/google-research/vision_transformer.\n    weights ported from official Google JAX impl:\n    链接: https://pan.baidu.com/s/1zqb08naP0RPqqfSXfkB2EA  密码: eu9f\n    \"\"\"\n    model = VisionTransformer(img_size=224,\n                              patch_size=16,\n                              embed_dim=768,\n                              depth=12,\n                              num_heads=12,\n                              representation_size=None,\n                              num_classes=num_classes)\n    return model\n\n\ndef vit_base_patch16_224_in21k(num_classes: int = 21843, has_logits: bool = True):\n    \"\"\"\n    ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).\n    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.\n    weights ported from official Google JAX impl:\n    https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_patch16_224_in21k-e5005f0a.pth\n    \"\"\"\n    model = VisionTransformer(img_size=224,\n                              patch_size=16,\n                              embed_dim=768,\n                              depth=12,\n                              num_heads=12,\n                              representation_size=768 if has_logits else None,\n                              num_classes=num_classes)\n    return model\n\n\ndef vit_base_patch32_224(num_classes: int = 1000):\n    \"\"\"\n    ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).\n    ImageNet-1k weights @ 224x224, source https://github.com/google-research/vision_transformer.\n    weights ported from official Google JAX impl:\n    链接: https://pan.baidu.com/s/1hCv0U8pQomwAtHBYc4hmZg  密码: s5hl\n    \"\"\"\n    model = VisionTransformer(img_size=224,\n                              patch_size=32,\n                              embed_dim=768,\n                              depth=12,\n                              num_heads=12,\n                              representation_size=None,\n                              num_classes=num_classes)\n    return model\n\n\ndef vit_base_patch32_224_in21k(num_classes: int = 21843, has_logits: bool = True):\n    \"\"\"\n    ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).\n    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.\n    weights ported from official Google JAX impl:\n    https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_patch32_224_in21k-8db57226.pth\n    \"\"\"\n    model = VisionTransformer(img_size=224,\n                              patch_size=32,\n                              embed_dim=768,\n                              depth=12,\n                              num_heads=12,\n                              representation_size=768 if has_logits else None,\n                              num_classes=num_classes)\n    return model\n\n\ndef vit_large_patch16_224(num_classes: int = 1000):\n    \"\"\"\n    ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).\n    ImageNet-1k weights @ 224x224, source https://github.com/google-research/vision_transformer.\n    weights ported from official Google JAX impl:\n    链接: https://pan.baidu.com/s/1cxBgZJJ6qUWPSBNcE4TdRQ  密码: qqt8\n    \"\"\"\n    model = VisionTransformer(img_size=224,\n                              patch_size=16,\n                              embed_dim=1024,\n                              depth=24,\n                              num_heads=16,\n                              representation_size=None,\n                              num_classes=num_classes)\n    return model\n\n\ndef vit_large_patch16_224_in21k(num_classes: int = 21843, has_logits: bool = True):\n    \"\"\"\n    ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).\n    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.\n    weights ported from official Google JAX impl:\n    https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_patch16_224_in21k-606da67d.pth\n    \"\"\"\n    model = VisionTransformer(img_size=224,\n                              patch_size=16,\n                              embed_dim=1024,\n                              depth=24,\n                              num_heads=16,\n                              representation_size=1024 if has_logits else None,\n                              num_classes=num_classes)\n    return model\n\n\ndef vit_large_patch32_224_in21k(num_classes: int = 21843, has_logits: bool = True):\n    \"\"\"\n    ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929).\n    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.\n    weights ported from official Google JAX impl:\n    https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_patch32_224_in21k-9046d2e7.pth\n    \"\"\"\n    model = VisionTransformer(img_size=224,\n                              patch_size=32,\n                              embed_dim=1024,\n                              depth=24,\n                              num_heads=16,\n                              representation_size=1024 if has_logits else None,\n                              num_classes=num_classes)\n    return model\n\n\ndef vit_huge_patch14_224_in21k(num_classes: int = 21843, has_logits: bool = True):\n    \"\"\"\n    ViT-Huge model (ViT-H/14) from original paper (https://arxiv.org/abs/2010.11929).\n    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.\n    NOTE: converted weights not currently available, too large for github release hosting.\n    \"\"\"\n    model = VisionTransformer(img_size=224,\n                              patch_size=14,\n                              embed_dim=1280,\n                              depth=32,\n                              num_heads=16,\n                              representation_size=1280 if has_logits else None,\n                              num_classes=num_classes)\n    return model\n"
  },
  {
    "path": "pytorch_classification/mini_imagenet/README.md",
    "content": "## download mini-imagenet\nlink: [https://pan.baidu.com/s/1Uro6RuEbRGGCQ8iXvF2SAQ](https://pan.baidu.com/s/1Uro6RuEbRGGCQ8iXvF2SAQ)  password: hl31\n\n## dataset path structure\n```\n├── mini-imagenet: total 100 classes, 60000 images\n     ├── images: 60000 images\n     ├── train.csv: 64 classes, 38400 images\n     ├── val.csv: 16 classes, 9600 images\n     └── test.csv: 20 classes, 12000 images\n```"
  },
  {
    "path": "pytorch_classification/mini_imagenet/imagenet_class_index.json",
    "content": "{\"0\": [\"n01440764\", \"tench\"], \"1\": [\"n01443537\", \"goldfish\"], \"2\": [\"n01484850\", \"great_white_shark\"], \"3\": [\"n01491361\", \"tiger_shark\"], \"4\": [\"n01494475\", \"hammerhead\"], \"5\": [\"n01496331\", \"electric_ray\"], \"6\": [\"n01498041\", \"stingray\"], \"7\": [\"n01514668\", \"cock\"], \"8\": [\"n01514859\", \"hen\"], \"9\": [\"n01518878\", \"ostrich\"], \"10\": [\"n01530575\", \"brambling\"], \"11\": [\"n01531178\", \"goldfinch\"], \"12\": [\"n01532829\", \"house_finch\"], \"13\": [\"n01534433\", \"junco\"], \"14\": [\"n01537544\", \"indigo_bunting\"], \"15\": [\"n01558993\", \"robin\"], \"16\": [\"n01560419\", \"bulbul\"], \"17\": [\"n01580077\", \"jay\"], \"18\": [\"n01582220\", \"magpie\"], \"19\": [\"n01592084\", \"chickadee\"], \"20\": [\"n01601694\", \"water_ouzel\"], \"21\": [\"n01608432\", \"kite\"], \"22\": [\"n01614925\", \"bald_eagle\"], \"23\": [\"n01616318\", \"vulture\"], \"24\": [\"n01622779\", \"great_grey_owl\"], \"25\": [\"n01629819\", \"European_fire_salamander\"], \"26\": [\"n01630670\", \"common_newt\"], \"27\": [\"n01631663\", \"eft\"], \"28\": [\"n01632458\", \"spotted_salamander\"], \"29\": [\"n01632777\", \"axolotl\"], \"30\": [\"n01641577\", \"bullfrog\"], \"31\": [\"n01644373\", \"tree_frog\"], \"32\": [\"n01644900\", \"tailed_frog\"], \"33\": [\"n01664065\", \"loggerhead\"], \"34\": [\"n01665541\", \"leatherback_turtle\"], \"35\": [\"n01667114\", \"mud_turtle\"], \"36\": [\"n01667778\", \"terrapin\"], \"37\": [\"n01669191\", \"box_turtle\"], \"38\": [\"n01675722\", \"banded_gecko\"], \"39\": [\"n01677366\", \"common_iguana\"], \"40\": [\"n01682714\", \"American_chameleon\"], \"41\": [\"n01685808\", \"whiptail\"], \"42\": [\"n01687978\", \"agama\"], \"43\": [\"n01688243\", \"frilled_lizard\"], \"44\": [\"n01689811\", \"alligator_lizard\"], \"45\": [\"n01692333\", \"Gila_monster\"], \"46\": [\"n01693334\", \"green_lizard\"], \"47\": [\"n01694178\", \"African_chameleon\"], \"48\": [\"n01695060\", \"Komodo_dragon\"], \"49\": [\"n01697457\", \"African_crocodile\"], \"50\": [\"n01698640\", \"American_alligator\"], \"51\": [\"n01704323\", \"triceratops\"], \"52\": [\"n01728572\", \"thunder_snake\"], \"53\": [\"n01728920\", \"ringneck_snake\"], \"54\": [\"n01729322\", \"hognose_snake\"], \"55\": [\"n01729977\", \"green_snake\"], \"56\": [\"n01734418\", \"king_snake\"], \"57\": [\"n01735189\", \"garter_snake\"], \"58\": [\"n01737021\", \"water_snake\"], \"59\": [\"n01739381\", \"vine_snake\"], \"60\": [\"n01740131\", \"night_snake\"], \"61\": [\"n01742172\", \"boa_constrictor\"], \"62\": [\"n01744401\", \"rock_python\"], \"63\": [\"n01748264\", \"Indian_cobra\"], \"64\": [\"n01749939\", \"green_mamba\"], \"65\": [\"n01751748\", \"sea_snake\"], \"66\": [\"n01753488\", \"horned_viper\"], \"67\": [\"n01755581\", \"diamondback\"], \"68\": [\"n01756291\", \"sidewinder\"], \"69\": [\"n01768244\", \"trilobite\"], \"70\": [\"n01770081\", \"harvestman\"], \"71\": [\"n01770393\", \"scorpion\"], \"72\": [\"n01773157\", \"black_and_gold_garden_spider\"], \"73\": [\"n01773549\", \"barn_spider\"], \"74\": [\"n01773797\", \"garden_spider\"], \"75\": [\"n01774384\", \"black_widow\"], \"76\": [\"n01774750\", \"tarantula\"], \"77\": [\"n01775062\", \"wolf_spider\"], \"78\": [\"n01776313\", \"tick\"], \"79\": [\"n01784675\", \"centipede\"], \"80\": [\"n01795545\", \"black_grouse\"], \"81\": [\"n01796340\", \"ptarmigan\"], \"82\": [\"n01797886\", \"ruffed_grouse\"], \"83\": [\"n01798484\", \"prairie_chicken\"], \"84\": [\"n01806143\", \"peacock\"], \"85\": [\"n01806567\", \"quail\"], \"86\": [\"n01807496\", \"partridge\"], \"87\": [\"n01817953\", \"African_grey\"], \"88\": [\"n01818515\", \"macaw\"], \"89\": [\"n01819313\", \"sulphur-crested_cockatoo\"], \"90\": [\"n01820546\", \"lorikeet\"], \"91\": [\"n01824575\", \"coucal\"], \"92\": [\"n01828970\", \"bee_eater\"], \"93\": [\"n01829413\", \"hornbill\"], \"94\": [\"n01833805\", \"hummingbird\"], \"95\": [\"n01843065\", \"jacamar\"], \"96\": [\"n01843383\", \"toucan\"], \"97\": [\"n01847000\", \"drake\"], \"98\": [\"n01855032\", \"red-breasted_merganser\"], \"99\": [\"n01855672\", \"goose\"], \"100\": [\"n01860187\", \"black_swan\"], \"101\": [\"n01871265\", \"tusker\"], \"102\": [\"n01872401\", \"echidna\"], \"103\": [\"n01873310\", \"platypus\"], \"104\": [\"n01877812\", \"wallaby\"], \"105\": [\"n01882714\", \"koala\"], \"106\": [\"n01883070\", \"wombat\"], \"107\": [\"n01910747\", \"jellyfish\"], \"108\": [\"n01914609\", \"sea_anemone\"], \"109\": [\"n01917289\", \"brain_coral\"], \"110\": [\"n01924916\", \"flatworm\"], \"111\": [\"n01930112\", \"nematode\"], \"112\": [\"n01943899\", \"conch\"], \"113\": [\"n01944390\", \"snail\"], \"114\": [\"n01945685\", \"slug\"], \"115\": [\"n01950731\", \"sea_slug\"], \"116\": [\"n01955084\", \"chiton\"], \"117\": [\"n01968897\", \"chambered_nautilus\"], \"118\": [\"n01978287\", \"Dungeness_crab\"], \"119\": [\"n01978455\", \"rock_crab\"], \"120\": [\"n01980166\", \"fiddler_crab\"], \"121\": [\"n01981276\", \"king_crab\"], \"122\": [\"n01983481\", \"American_lobster\"], \"123\": [\"n01984695\", \"spiny_lobster\"], \"124\": [\"n01985128\", \"crayfish\"], \"125\": [\"n01986214\", \"hermit_crab\"], \"126\": [\"n01990800\", \"isopod\"], \"127\": [\"n02002556\", \"white_stork\"], \"128\": [\"n02002724\", \"black_stork\"], \"129\": [\"n02006656\", \"spoonbill\"], \"130\": [\"n02007558\", \"flamingo\"], \"131\": [\"n02009229\", \"little_blue_heron\"], \"132\": [\"n02009912\", \"American_egret\"], \"133\": [\"n02011460\", \"bittern\"], \"134\": [\"n02012849\", \"crane\"], \"135\": [\"n02013706\", \"limpkin\"], \"136\": [\"n02017213\", \"European_gallinule\"], \"137\": [\"n02018207\", \"American_coot\"], \"138\": [\"n02018795\", \"bustard\"], \"139\": [\"n02025239\", \"ruddy_turnstone\"], \"140\": [\"n02027492\", \"red-backed_sandpiper\"], \"141\": [\"n02028035\", \"redshank\"], \"142\": [\"n02033041\", \"dowitcher\"], \"143\": [\"n02037110\", \"oystercatcher\"], \"144\": [\"n02051845\", \"pelican\"], \"145\": [\"n02056570\", \"king_penguin\"], \"146\": [\"n02058221\", \"albatross\"], \"147\": [\"n02066245\", \"grey_whale\"], \"148\": [\"n02071294\", \"killer_whale\"], \"149\": [\"n02074367\", \"dugong\"], \"150\": [\"n02077923\", \"sea_lion\"], \"151\": [\"n02085620\", \"Chihuahua\"], \"152\": [\"n02085782\", \"Japanese_spaniel\"], \"153\": [\"n02085936\", \"Maltese_dog\"], \"154\": [\"n02086079\", \"Pekinese\"], \"155\": [\"n02086240\", \"Shih-Tzu\"], \"156\": [\"n02086646\", \"Blenheim_spaniel\"], \"157\": [\"n02086910\", \"papillon\"], \"158\": [\"n02087046\", \"toy_terrier\"], \"159\": [\"n02087394\", \"Rhodesian_ridgeback\"], \"160\": [\"n02088094\", \"Afghan_hound\"], \"161\": [\"n02088238\", \"basset\"], \"162\": [\"n02088364\", \"beagle\"], \"163\": [\"n02088466\", \"bloodhound\"], \"164\": [\"n02088632\", \"bluetick\"], \"165\": [\"n02089078\", \"black-and-tan_coonhound\"], \"166\": [\"n02089867\", \"Walker_hound\"], \"167\": [\"n02089973\", \"English_foxhound\"], \"168\": [\"n02090379\", \"redbone\"], \"169\": [\"n02090622\", \"borzoi\"], \"170\": [\"n02090721\", \"Irish_wolfhound\"], \"171\": [\"n02091032\", \"Italian_greyhound\"], \"172\": [\"n02091134\", \"whippet\"], \"173\": [\"n02091244\", \"Ibizan_hound\"], \"174\": [\"n02091467\", \"Norwegian_elkhound\"], \"175\": [\"n02091635\", \"otterhound\"], \"176\": [\"n02091831\", \"Saluki\"], \"177\": [\"n02092002\", \"Scottish_deerhound\"], \"178\": [\"n02092339\", \"Weimaraner\"], \"179\": [\"n02093256\", \"Staffordshire_bullterrier\"], \"180\": [\"n02093428\", \"American_Staffordshire_terrier\"], \"181\": [\"n02093647\", \"Bedlington_terrier\"], \"182\": [\"n02093754\", \"Border_terrier\"], \"183\": [\"n02093859\", \"Kerry_blue_terrier\"], \"184\": [\"n02093991\", \"Irish_terrier\"], \"185\": [\"n02094114\", \"Norfolk_terrier\"], \"186\": [\"n02094258\", \"Norwich_terrier\"], \"187\": [\"n02094433\", \"Yorkshire_terrier\"], \"188\": [\"n02095314\", \"wire-haired_fox_terrier\"], \"189\": [\"n02095570\", \"Lakeland_terrier\"], \"190\": [\"n02095889\", \"Sealyham_terrier\"], \"191\": [\"n02096051\", \"Airedale\"], \"192\": [\"n02096177\", \"cairn\"], \"193\": [\"n02096294\", \"Australian_terrier\"], \"194\": [\"n02096437\", \"Dandie_Dinmont\"], \"195\": [\"n02096585\", \"Boston_bull\"], \"196\": [\"n02097047\", \"miniature_schnauzer\"], \"197\": [\"n02097130\", \"giant_schnauzer\"], \"198\": [\"n02097209\", \"standard_schnauzer\"], \"199\": [\"n02097298\", \"Scotch_terrier\"], \"200\": [\"n02097474\", \"Tibetan_terrier\"], \"201\": [\"n02097658\", \"silky_terrier\"], \"202\": [\"n02098105\", \"soft-coated_wheaten_terrier\"], \"203\": [\"n02098286\", \"West_Highland_white_terrier\"], \"204\": [\"n02098413\", \"Lhasa\"], \"205\": [\"n02099267\", \"flat-coated_retriever\"], \"206\": [\"n02099429\", \"curly-coated_retriever\"], \"207\": [\"n02099601\", \"golden_retriever\"], \"208\": [\"n02099712\", \"Labrador_retriever\"], \"209\": [\"n02099849\", \"Chesapeake_Bay_retriever\"], \"210\": [\"n02100236\", \"German_short-haired_pointer\"], \"211\": [\"n02100583\", \"vizsla\"], \"212\": [\"n02100735\", \"English_setter\"], \"213\": [\"n02100877\", \"Irish_setter\"], \"214\": [\"n02101006\", \"Gordon_setter\"], \"215\": [\"n02101388\", \"Brittany_spaniel\"], \"216\": [\"n02101556\", \"clumber\"], \"217\": [\"n02102040\", \"English_springer\"], \"218\": [\"n02102177\", \"Welsh_springer_spaniel\"], \"219\": [\"n02102318\", \"cocker_spaniel\"], \"220\": [\"n02102480\", \"Sussex_spaniel\"], \"221\": [\"n02102973\", \"Irish_water_spaniel\"], \"222\": [\"n02104029\", \"kuvasz\"], \"223\": [\"n02104365\", \"schipperke\"], \"224\": [\"n02105056\", \"groenendael\"], \"225\": [\"n02105162\", \"malinois\"], \"226\": [\"n02105251\", \"briard\"], \"227\": [\"n02105412\", \"kelpie\"], \"228\": [\"n02105505\", \"komondor\"], \"229\": [\"n02105641\", \"Old_English_sheepdog\"], \"230\": [\"n02105855\", \"Shetland_sheepdog\"], \"231\": [\"n02106030\", \"collie\"], \"232\": [\"n02106166\", \"Border_collie\"], \"233\": [\"n02106382\", \"Bouvier_des_Flandres\"], \"234\": [\"n02106550\", \"Rottweiler\"], \"235\": [\"n02106662\", \"German_shepherd\"], \"236\": [\"n02107142\", \"Doberman\"], \"237\": [\"n02107312\", \"miniature_pinscher\"], \"238\": [\"n02107574\", \"Greater_Swiss_Mountain_dog\"], \"239\": [\"n02107683\", \"Bernese_mountain_dog\"], \"240\": [\"n02107908\", \"Appenzeller\"], \"241\": [\"n02108000\", \"EntleBucher\"], \"242\": [\"n02108089\", \"boxer\"], \"243\": [\"n02108422\", \"bull_mastiff\"], \"244\": [\"n02108551\", \"Tibetan_mastiff\"], \"245\": [\"n02108915\", \"French_bulldog\"], \"246\": [\"n02109047\", \"Great_Dane\"], \"247\": [\"n02109525\", \"Saint_Bernard\"], \"248\": [\"n02109961\", \"Eskimo_dog\"], \"249\": [\"n02110063\", \"malamute\"], \"250\": [\"n02110185\", \"Siberian_husky\"], \"251\": [\"n02110341\", \"dalmatian\"], \"252\": [\"n02110627\", \"affenpinscher\"], \"253\": [\"n02110806\", \"basenji\"], \"254\": [\"n02110958\", \"pug\"], \"255\": [\"n02111129\", \"Leonberg\"], \"256\": [\"n02111277\", \"Newfoundland\"], \"257\": [\"n02111500\", \"Great_Pyrenees\"], \"258\": [\"n02111889\", \"Samoyed\"], \"259\": [\"n02112018\", \"Pomeranian\"], \"260\": [\"n02112137\", \"chow\"], \"261\": [\"n02112350\", \"keeshond\"], \"262\": [\"n02112706\", \"Brabancon_griffon\"], \"263\": [\"n02113023\", \"Pembroke\"], \"264\": [\"n02113186\", \"Cardigan\"], \"265\": [\"n02113624\", \"toy_poodle\"], \"266\": [\"n02113712\", \"miniature_poodle\"], \"267\": [\"n02113799\", \"standard_poodle\"], \"268\": [\"n02113978\", \"Mexican_hairless\"], \"269\": [\"n02114367\", \"timber_wolf\"], \"270\": [\"n02114548\", \"white_wolf\"], \"271\": [\"n02114712\", \"red_wolf\"], \"272\": [\"n02114855\", \"coyote\"], \"273\": [\"n02115641\", \"dingo\"], \"274\": [\"n02115913\", \"dhole\"], \"275\": [\"n02116738\", \"African_hunting_dog\"], \"276\": [\"n02117135\", \"hyena\"], \"277\": [\"n02119022\", \"red_fox\"], \"278\": [\"n02119789\", \"kit_fox\"], \"279\": [\"n02120079\", \"Arctic_fox\"], \"280\": [\"n02120505\", \"grey_fox\"], \"281\": [\"n02123045\", \"tabby\"], \"282\": [\"n02123159\", \"tiger_cat\"], \"283\": [\"n02123394\", \"Persian_cat\"], \"284\": [\"n02123597\", \"Siamese_cat\"], \"285\": [\"n02124075\", \"Egyptian_cat\"], \"286\": [\"n02125311\", \"cougar\"], \"287\": [\"n02127052\", \"lynx\"], \"288\": [\"n02128385\", \"leopard\"], \"289\": [\"n02128757\", \"snow_leopard\"], \"290\": [\"n02128925\", \"jaguar\"], \"291\": [\"n02129165\", \"lion\"], \"292\": [\"n02129604\", \"tiger\"], \"293\": [\"n02130308\", \"cheetah\"], \"294\": [\"n02132136\", \"brown_bear\"], \"295\": [\"n02133161\", \"American_black_bear\"], \"296\": [\"n02134084\", \"ice_bear\"], \"297\": [\"n02134418\", \"sloth_bear\"], \"298\": [\"n02137549\", \"mongoose\"], \"299\": [\"n02138441\", \"meerkat\"], \"300\": [\"n02165105\", \"tiger_beetle\"], \"301\": [\"n02165456\", \"ladybug\"], \"302\": [\"n02167151\", \"ground_beetle\"], \"303\": [\"n02168699\", \"long-horned_beetle\"], \"304\": [\"n02169497\", \"leaf_beetle\"], \"305\": [\"n02172182\", \"dung_beetle\"], \"306\": [\"n02174001\", \"rhinoceros_beetle\"], \"307\": [\"n02177972\", \"weevil\"], \"308\": [\"n02190166\", \"fly\"], \"309\": [\"n02206856\", \"bee\"], \"310\": [\"n02219486\", \"ant\"], \"311\": [\"n02226429\", \"grasshopper\"], \"312\": [\"n02229544\", \"cricket\"], \"313\": [\"n02231487\", \"walking_stick\"], \"314\": [\"n02233338\", \"cockroach\"], \"315\": [\"n02236044\", \"mantis\"], \"316\": [\"n02256656\", \"cicada\"], \"317\": [\"n02259212\", \"leafhopper\"], \"318\": [\"n02264363\", \"lacewing\"], \"319\": [\"n02268443\", \"dragonfly\"], \"320\": [\"n02268853\", \"damselfly\"], \"321\": [\"n02276258\", \"admiral\"], \"322\": [\"n02277742\", \"ringlet\"], \"323\": [\"n02279972\", \"monarch\"], \"324\": [\"n02280649\", \"cabbage_butterfly\"], \"325\": [\"n02281406\", \"sulphur_butterfly\"], \"326\": [\"n02281787\", \"lycaenid\"], \"327\": [\"n02317335\", \"starfish\"], \"328\": [\"n02319095\", \"sea_urchin\"], \"329\": [\"n02321529\", \"sea_cucumber\"], \"330\": [\"n02325366\", \"wood_rabbit\"], \"331\": [\"n02326432\", \"hare\"], \"332\": [\"n02328150\", \"Angora\"], \"333\": [\"n02342885\", \"hamster\"], \"334\": [\"n02346627\", \"porcupine\"], \"335\": [\"n02356798\", \"fox_squirrel\"], \"336\": [\"n02361337\", \"marmot\"], \"337\": [\"n02363005\", \"beaver\"], \"338\": [\"n02364673\", \"guinea_pig\"], \"339\": [\"n02389026\", \"sorrel\"], \"340\": [\"n02391049\", \"zebra\"], \"341\": [\"n02395406\", \"hog\"], \"342\": [\"n02396427\", \"wild_boar\"], \"343\": [\"n02397096\", \"warthog\"], \"344\": [\"n02398521\", \"hippopotamus\"], \"345\": [\"n02403003\", \"ox\"], \"346\": [\"n02408429\", \"water_buffalo\"], \"347\": [\"n02410509\", \"bison\"], \"348\": [\"n02412080\", \"ram\"], \"349\": [\"n02415577\", \"bighorn\"], \"350\": [\"n02417914\", \"ibex\"], \"351\": [\"n02422106\", \"hartebeest\"], \"352\": [\"n02422699\", \"impala\"], \"353\": [\"n02423022\", \"gazelle\"], \"354\": [\"n02437312\", \"Arabian_camel\"], \"355\": [\"n02437616\", \"llama\"], \"356\": [\"n02441942\", \"weasel\"], \"357\": [\"n02442845\", \"mink\"], \"358\": [\"n02443114\", \"polecat\"], \"359\": [\"n02443484\", \"black-footed_ferret\"], \"360\": [\"n02444819\", \"otter\"], \"361\": [\"n02445715\", \"skunk\"], \"362\": [\"n02447366\", \"badger\"], \"363\": [\"n02454379\", \"armadillo\"], \"364\": [\"n02457408\", \"three-toed_sloth\"], \"365\": [\"n02480495\", \"orangutan\"], \"366\": [\"n02480855\", \"gorilla\"], \"367\": [\"n02481823\", \"chimpanzee\"], \"368\": [\"n02483362\", \"gibbon\"], \"369\": [\"n02483708\", \"siamang\"], \"370\": [\"n02484975\", \"guenon\"], \"371\": [\"n02486261\", \"patas\"], \"372\": [\"n02486410\", \"baboon\"], \"373\": [\"n02487347\", \"macaque\"], \"374\": [\"n02488291\", \"langur\"], \"375\": [\"n02488702\", \"colobus\"], \"376\": [\"n02489166\", \"proboscis_monkey\"], \"377\": [\"n02490219\", \"marmoset\"], \"378\": [\"n02492035\", \"capuchin\"], \"379\": [\"n02492660\", \"howler_monkey\"], \"380\": [\"n02493509\", \"titi\"], \"381\": [\"n02493793\", \"spider_monkey\"], \"382\": [\"n02494079\", \"squirrel_monkey\"], \"383\": [\"n02497673\", \"Madagascar_cat\"], \"384\": [\"n02500267\", \"indri\"], \"385\": [\"n02504013\", \"Indian_elephant\"], \"386\": [\"n02504458\", \"African_elephant\"], \"387\": [\"n02509815\", \"lesser_panda\"], \"388\": [\"n02510455\", \"giant_panda\"], \"389\": [\"n02514041\", \"barracouta\"], \"390\": [\"n02526121\", \"eel\"], \"391\": [\"n02536864\", \"coho\"], \"392\": [\"n02606052\", \"rock_beauty\"], \"393\": [\"n02607072\", \"anemone_fish\"], \"394\": [\"n02640242\", \"sturgeon\"], \"395\": [\"n02641379\", \"gar\"], \"396\": [\"n02643566\", \"lionfish\"], \"397\": [\"n02655020\", \"puffer\"], \"398\": [\"n02666196\", \"abacus\"], \"399\": [\"n02667093\", \"abaya\"], \"400\": [\"n02669723\", \"academic_gown\"], \"401\": [\"n02672831\", \"accordion\"], \"402\": [\"n02676566\", \"acoustic_guitar\"], \"403\": [\"n02687172\", \"aircraft_carrier\"], \"404\": [\"n02690373\", \"airliner\"], \"405\": [\"n02692877\", \"airship\"], \"406\": [\"n02699494\", \"altar\"], \"407\": [\"n02701002\", \"ambulance\"], \"408\": [\"n02704792\", \"amphibian\"], \"409\": [\"n02708093\", \"analog_clock\"], \"410\": [\"n02727426\", \"apiary\"], \"411\": [\"n02730930\", \"apron\"], \"412\": [\"n02747177\", \"ashcan\"], \"413\": [\"n02749479\", \"assault_rifle\"], \"414\": [\"n02769748\", \"backpack\"], \"415\": [\"n02776631\", \"bakery\"], \"416\": [\"n02777292\", \"balance_beam\"], \"417\": [\"n02782093\", \"balloon\"], \"418\": [\"n02783161\", \"ballpoint\"], \"419\": [\"n02786058\", \"Band_Aid\"], \"420\": [\"n02787622\", \"banjo\"], \"421\": [\"n02788148\", \"bannister\"], \"422\": [\"n02790996\", \"barbell\"], \"423\": [\"n02791124\", \"barber_chair\"], \"424\": [\"n02791270\", \"barbershop\"], \"425\": [\"n02793495\", \"barn\"], \"426\": [\"n02794156\", \"barometer\"], \"427\": [\"n02795169\", \"barrel\"], \"428\": [\"n02797295\", \"barrow\"], \"429\": [\"n02799071\", \"baseball\"], \"430\": [\"n02802426\", \"basketball\"], \"431\": [\"n02804414\", \"bassinet\"], \"432\": [\"n02804610\", \"bassoon\"], \"433\": [\"n02807133\", \"bathing_cap\"], \"434\": [\"n02808304\", \"bath_towel\"], \"435\": [\"n02808440\", \"bathtub\"], \"436\": [\"n02814533\", \"beach_wagon\"], \"437\": [\"n02814860\", \"beacon\"], \"438\": [\"n02815834\", \"beaker\"], \"439\": [\"n02817516\", \"bearskin\"], \"440\": [\"n02823428\", \"beer_bottle\"], \"441\": [\"n02823750\", \"beer_glass\"], \"442\": [\"n02825657\", \"bell_cote\"], \"443\": [\"n02834397\", \"bib\"], \"444\": [\"n02835271\", \"bicycle-built-for-two\"], \"445\": [\"n02837789\", \"bikini\"], \"446\": [\"n02840245\", \"binder\"], \"447\": [\"n02841315\", \"binoculars\"], \"448\": [\"n02843684\", \"birdhouse\"], \"449\": [\"n02859443\", \"boathouse\"], \"450\": [\"n02860847\", \"bobsled\"], \"451\": [\"n02865351\", \"bolo_tie\"], \"452\": [\"n02869837\", \"bonnet\"], \"453\": [\"n02870880\", \"bookcase\"], \"454\": [\"n02871525\", \"bookshop\"], \"455\": [\"n02877765\", \"bottlecap\"], \"456\": [\"n02879718\", \"bow\"], \"457\": [\"n02883205\", \"bow_tie\"], \"458\": [\"n02892201\", \"brass\"], \"459\": [\"n02892767\", \"brassiere\"], \"460\": [\"n02894605\", \"breakwater\"], \"461\": [\"n02895154\", \"breastplate\"], \"462\": [\"n02906734\", \"broom\"], \"463\": [\"n02909870\", \"bucket\"], \"464\": [\"n02910353\", \"buckle\"], \"465\": [\"n02916936\", \"bulletproof_vest\"], \"466\": [\"n02917067\", \"bullet_train\"], \"467\": [\"n02927161\", \"butcher_shop\"], \"468\": [\"n02930766\", \"cab\"], \"469\": [\"n02939185\", \"caldron\"], \"470\": [\"n02948072\", \"candle\"], \"471\": [\"n02950826\", \"cannon\"], \"472\": [\"n02951358\", \"canoe\"], \"473\": [\"n02951585\", \"can_opener\"], \"474\": [\"n02963159\", \"cardigan\"], \"475\": [\"n02965783\", \"car_mirror\"], \"476\": [\"n02966193\", \"carousel\"], \"477\": [\"n02966687\", \"carpenter's_kit\"], \"478\": [\"n02971356\", \"carton\"], \"479\": [\"n02974003\", \"car_wheel\"], \"480\": [\"n02977058\", \"cash_machine\"], \"481\": [\"n02978881\", \"cassette\"], \"482\": [\"n02979186\", \"cassette_player\"], \"483\": [\"n02980441\", \"castle\"], \"484\": [\"n02981792\", \"catamaran\"], \"485\": [\"n02988304\", \"CD_player\"], \"486\": [\"n02992211\", \"cello\"], \"487\": [\"n02992529\", \"cellular_telephone\"], \"488\": [\"n02999410\", \"chain\"], \"489\": [\"n03000134\", \"chainlink_fence\"], \"490\": [\"n03000247\", \"chain_mail\"], \"491\": [\"n03000684\", \"chain_saw\"], \"492\": [\"n03014705\", \"chest\"], \"493\": [\"n03016953\", \"chiffonier\"], \"494\": [\"n03017168\", \"chime\"], \"495\": [\"n03018349\", \"china_cabinet\"], \"496\": [\"n03026506\", \"Christmas_stocking\"], \"497\": [\"n03028079\", \"church\"], \"498\": [\"n03032252\", \"cinema\"], \"499\": [\"n03041632\", \"cleaver\"], \"500\": [\"n03042490\", \"cliff_dwelling\"], \"501\": [\"n03045698\", \"cloak\"], \"502\": [\"n03047690\", \"clog\"], \"503\": [\"n03062245\", \"cocktail_shaker\"], \"504\": [\"n03063599\", \"coffee_mug\"], \"505\": [\"n03063689\", \"coffeepot\"], \"506\": [\"n03065424\", \"coil\"], \"507\": [\"n03075370\", \"combination_lock\"], \"508\": [\"n03085013\", \"computer_keyboard\"], \"509\": [\"n03089624\", \"confectionery\"], \"510\": [\"n03095699\", \"container_ship\"], \"511\": [\"n03100240\", \"convertible\"], \"512\": [\"n03109150\", \"corkscrew\"], \"513\": [\"n03110669\", \"cornet\"], \"514\": [\"n03124043\", \"cowboy_boot\"], \"515\": [\"n03124170\", \"cowboy_hat\"], \"516\": [\"n03125729\", \"cradle\"], \"517\": [\"n03126707\", \"crane\"], \"518\": [\"n03127747\", \"crash_helmet\"], \"519\": [\"n03127925\", \"crate\"], \"520\": [\"n03131574\", \"crib\"], \"521\": [\"n03133878\", \"Crock_Pot\"], \"522\": [\"n03134739\", \"croquet_ball\"], \"523\": [\"n03141823\", \"crutch\"], \"524\": [\"n03146219\", \"cuirass\"], \"525\": [\"n03160309\", \"dam\"], \"526\": [\"n03179701\", \"desk\"], \"527\": [\"n03180011\", \"desktop_computer\"], \"528\": [\"n03187595\", \"dial_telephone\"], \"529\": [\"n03188531\", \"diaper\"], \"530\": [\"n03196217\", \"digital_clock\"], \"531\": [\"n03197337\", \"digital_watch\"], \"532\": [\"n03201208\", \"dining_table\"], \"533\": [\"n03207743\", \"dishrag\"], \"534\": [\"n03207941\", \"dishwasher\"], \"535\": [\"n03208938\", \"disk_brake\"], \"536\": [\"n03216828\", \"dock\"], \"537\": [\"n03218198\", \"dogsled\"], \"538\": [\"n03220513\", \"dome\"], \"539\": [\"n03223299\", \"doormat\"], \"540\": [\"n03240683\", \"drilling_platform\"], \"541\": [\"n03249569\", \"drum\"], \"542\": [\"n03250847\", \"drumstick\"], \"543\": [\"n03255030\", \"dumbbell\"], \"544\": [\"n03259280\", \"Dutch_oven\"], \"545\": [\"n03271574\", \"electric_fan\"], \"546\": [\"n03272010\", \"electric_guitar\"], \"547\": [\"n03272562\", \"electric_locomotive\"], \"548\": [\"n03290653\", \"entertainment_center\"], \"549\": [\"n03291819\", \"envelope\"], \"550\": [\"n03297495\", \"espresso_maker\"], \"551\": [\"n03314780\", \"face_powder\"], \"552\": [\"n03325584\", \"feather_boa\"], \"553\": [\"n03337140\", \"file\"], \"554\": [\"n03344393\", \"fireboat\"], \"555\": [\"n03345487\", \"fire_engine\"], \"556\": [\"n03347037\", \"fire_screen\"], \"557\": [\"n03355925\", \"flagpole\"], \"558\": [\"n03372029\", \"flute\"], \"559\": [\"n03376595\", \"folding_chair\"], \"560\": [\"n03379051\", \"football_helmet\"], \"561\": [\"n03384352\", \"forklift\"], \"562\": [\"n03388043\", \"fountain\"], \"563\": [\"n03388183\", \"fountain_pen\"], \"564\": [\"n03388549\", \"four-poster\"], \"565\": [\"n03393912\", \"freight_car\"], \"566\": [\"n03394916\", \"French_horn\"], \"567\": [\"n03400231\", \"frying_pan\"], \"568\": [\"n03404251\", \"fur_coat\"], \"569\": [\"n03417042\", \"garbage_truck\"], \"570\": [\"n03424325\", \"gasmask\"], \"571\": [\"n03425413\", \"gas_pump\"], \"572\": [\"n03443371\", \"goblet\"], \"573\": [\"n03444034\", \"go-kart\"], \"574\": [\"n03445777\", \"golf_ball\"], \"575\": [\"n03445924\", \"golfcart\"], \"576\": [\"n03447447\", \"gondola\"], \"577\": [\"n03447721\", \"gong\"], \"578\": [\"n03450230\", \"gown\"], \"579\": [\"n03452741\", \"grand_piano\"], \"580\": [\"n03457902\", \"greenhouse\"], \"581\": [\"n03459775\", \"grille\"], \"582\": [\"n03461385\", \"grocery_store\"], \"583\": [\"n03467068\", \"guillotine\"], \"584\": [\"n03476684\", \"hair_slide\"], \"585\": [\"n03476991\", \"hair_spray\"], \"586\": [\"n03478589\", \"half_track\"], \"587\": [\"n03481172\", \"hammer\"], \"588\": [\"n03482405\", \"hamper\"], \"589\": [\"n03483316\", \"hand_blower\"], \"590\": [\"n03485407\", \"hand-held_computer\"], \"591\": [\"n03485794\", \"handkerchief\"], \"592\": [\"n03492542\", \"hard_disc\"], \"593\": [\"n03494278\", \"harmonica\"], \"594\": [\"n03495258\", \"harp\"], \"595\": [\"n03496892\", \"harvester\"], \"596\": [\"n03498962\", \"hatchet\"], \"597\": [\"n03527444\", \"holster\"], \"598\": [\"n03529860\", \"home_theater\"], \"599\": [\"n03530642\", \"honeycomb\"], \"600\": [\"n03532672\", \"hook\"], \"601\": [\"n03534580\", \"hoopskirt\"], \"602\": [\"n03535780\", \"horizontal_bar\"], \"603\": [\"n03538406\", \"horse_cart\"], \"604\": [\"n03544143\", \"hourglass\"], \"605\": [\"n03584254\", \"iPod\"], \"606\": [\"n03584829\", \"iron\"], \"607\": [\"n03590841\", \"jack-o'-lantern\"], \"608\": [\"n03594734\", \"jean\"], \"609\": [\"n03594945\", \"jeep\"], \"610\": [\"n03595614\", \"jersey\"], \"611\": [\"n03598930\", \"jigsaw_puzzle\"], \"612\": [\"n03599486\", \"jinrikisha\"], \"613\": [\"n03602883\", \"joystick\"], \"614\": [\"n03617480\", \"kimono\"], \"615\": [\"n03623198\", \"knee_pad\"], \"616\": [\"n03627232\", \"knot\"], \"617\": [\"n03630383\", \"lab_coat\"], \"618\": [\"n03633091\", \"ladle\"], \"619\": [\"n03637318\", \"lampshade\"], \"620\": [\"n03642806\", \"laptop\"], \"621\": [\"n03649909\", \"lawn_mower\"], \"622\": [\"n03657121\", \"lens_cap\"], \"623\": [\"n03658185\", \"letter_opener\"], \"624\": [\"n03661043\", \"library\"], \"625\": [\"n03662601\", \"lifeboat\"], \"626\": [\"n03666591\", \"lighter\"], \"627\": [\"n03670208\", \"limousine\"], \"628\": [\"n03673027\", \"liner\"], \"629\": [\"n03676483\", \"lipstick\"], \"630\": [\"n03680355\", \"Loafer\"], \"631\": [\"n03690938\", \"lotion\"], \"632\": [\"n03691459\", \"loudspeaker\"], \"633\": [\"n03692522\", \"loupe\"], \"634\": [\"n03697007\", \"lumbermill\"], \"635\": [\"n03706229\", \"magnetic_compass\"], \"636\": [\"n03709823\", \"mailbag\"], \"637\": [\"n03710193\", \"mailbox\"], \"638\": [\"n03710637\", \"maillot\"], \"639\": [\"n03710721\", \"maillot\"], \"640\": [\"n03717622\", \"manhole_cover\"], \"641\": [\"n03720891\", \"maraca\"], \"642\": [\"n03721384\", \"marimba\"], \"643\": [\"n03724870\", \"mask\"], \"644\": [\"n03729826\", \"matchstick\"], \"645\": [\"n03733131\", \"maypole\"], \"646\": [\"n03733281\", \"maze\"], \"647\": [\"n03733805\", \"measuring_cup\"], \"648\": [\"n03742115\", \"medicine_chest\"], \"649\": [\"n03743016\", \"megalith\"], \"650\": [\"n03759954\", \"microphone\"], \"651\": [\"n03761084\", \"microwave\"], \"652\": [\"n03763968\", \"military_uniform\"], \"653\": [\"n03764736\", \"milk_can\"], \"654\": [\"n03769881\", \"minibus\"], \"655\": [\"n03770439\", \"miniskirt\"], \"656\": [\"n03770679\", \"minivan\"], \"657\": [\"n03773504\", \"missile\"], \"658\": [\"n03775071\", \"mitten\"], \"659\": [\"n03775546\", \"mixing_bowl\"], \"660\": [\"n03776460\", \"mobile_home\"], \"661\": [\"n03777568\", \"Model_T\"], \"662\": [\"n03777754\", \"modem\"], \"663\": [\"n03781244\", \"monastery\"], \"664\": [\"n03782006\", \"monitor\"], \"665\": [\"n03785016\", \"moped\"], \"666\": [\"n03786901\", \"mortar\"], \"667\": [\"n03787032\", \"mortarboard\"], \"668\": [\"n03788195\", \"mosque\"], \"669\": [\"n03788365\", \"mosquito_net\"], \"670\": [\"n03791053\", \"motor_scooter\"], \"671\": [\"n03792782\", \"mountain_bike\"], \"672\": [\"n03792972\", \"mountain_tent\"], \"673\": [\"n03793489\", \"mouse\"], \"674\": [\"n03794056\", \"mousetrap\"], \"675\": [\"n03796401\", \"moving_van\"], \"676\": [\"n03803284\", \"muzzle\"], \"677\": [\"n03804744\", \"nail\"], \"678\": [\"n03814639\", \"neck_brace\"], \"679\": [\"n03814906\", \"necklace\"], \"680\": [\"n03825788\", \"nipple\"], \"681\": [\"n03832673\", \"notebook\"], \"682\": [\"n03837869\", \"obelisk\"], \"683\": [\"n03838899\", \"oboe\"], \"684\": [\"n03840681\", \"ocarina\"], \"685\": [\"n03841143\", \"odometer\"], \"686\": [\"n03843555\", \"oil_filter\"], \"687\": [\"n03854065\", \"organ\"], \"688\": [\"n03857828\", \"oscilloscope\"], \"689\": [\"n03866082\", \"overskirt\"], \"690\": [\"n03868242\", \"oxcart\"], \"691\": [\"n03868863\", \"oxygen_mask\"], \"692\": [\"n03871628\", \"packet\"], \"693\": [\"n03873416\", \"paddle\"], \"694\": [\"n03874293\", \"paddlewheel\"], \"695\": [\"n03874599\", \"padlock\"], \"696\": [\"n03876231\", \"paintbrush\"], \"697\": [\"n03877472\", \"pajama\"], \"698\": [\"n03877845\", \"palace\"], \"699\": [\"n03884397\", \"panpipe\"], \"700\": [\"n03887697\", \"paper_towel\"], \"701\": [\"n03888257\", \"parachute\"], \"702\": [\"n03888605\", \"parallel_bars\"], \"703\": [\"n03891251\", \"park_bench\"], \"704\": [\"n03891332\", \"parking_meter\"], \"705\": [\"n03895866\", \"passenger_car\"], \"706\": [\"n03899768\", \"patio\"], \"707\": [\"n03902125\", \"pay-phone\"], \"708\": [\"n03903868\", \"pedestal\"], \"709\": [\"n03908618\", \"pencil_box\"], \"710\": [\"n03908714\", \"pencil_sharpener\"], \"711\": [\"n03916031\", \"perfume\"], \"712\": [\"n03920288\", \"Petri_dish\"], \"713\": [\"n03924679\", \"photocopier\"], \"714\": [\"n03929660\", \"pick\"], \"715\": [\"n03929855\", \"pickelhaube\"], \"716\": [\"n03930313\", \"picket_fence\"], \"717\": [\"n03930630\", \"pickup\"], \"718\": [\"n03933933\", \"pier\"], \"719\": [\"n03935335\", \"piggy_bank\"], \"720\": [\"n03937543\", \"pill_bottle\"], \"721\": [\"n03938244\", \"pillow\"], \"722\": [\"n03942813\", \"ping-pong_ball\"], \"723\": [\"n03944341\", \"pinwheel\"], \"724\": [\"n03947888\", \"pirate\"], \"725\": [\"n03950228\", \"pitcher\"], \"726\": [\"n03954731\", \"plane\"], \"727\": [\"n03956157\", \"planetarium\"], \"728\": [\"n03958227\", \"plastic_bag\"], \"729\": [\"n03961711\", \"plate_rack\"], \"730\": [\"n03967562\", \"plow\"], \"731\": [\"n03970156\", \"plunger\"], \"732\": [\"n03976467\", \"Polaroid_camera\"], \"733\": [\"n03976657\", \"pole\"], \"734\": [\"n03977966\", \"police_van\"], \"735\": [\"n03980874\", \"poncho\"], \"736\": [\"n03982430\", \"pool_table\"], \"737\": [\"n03983396\", \"pop_bottle\"], \"738\": [\"n03991062\", \"pot\"], \"739\": [\"n03992509\", \"potter's_wheel\"], \"740\": [\"n03995372\", \"power_drill\"], \"741\": [\"n03998194\", \"prayer_rug\"], \"742\": [\"n04004767\", \"printer\"], \"743\": [\"n04005630\", \"prison\"], \"744\": [\"n04008634\", \"projectile\"], \"745\": [\"n04009552\", \"projector\"], \"746\": [\"n04019541\", \"puck\"], \"747\": [\"n04023962\", \"punching_bag\"], \"748\": [\"n04026417\", \"purse\"], \"749\": [\"n04033901\", \"quill\"], \"750\": [\"n04033995\", \"quilt\"], \"751\": [\"n04037443\", \"racer\"], \"752\": [\"n04039381\", \"racket\"], \"753\": [\"n04040759\", \"radiator\"], \"754\": [\"n04041544\", \"radio\"], \"755\": [\"n04044716\", \"radio_telescope\"], \"756\": [\"n04049303\", \"rain_barrel\"], \"757\": [\"n04065272\", \"recreational_vehicle\"], \"758\": [\"n04067472\", \"reel\"], \"759\": [\"n04069434\", \"reflex_camera\"], \"760\": [\"n04070727\", \"refrigerator\"], \"761\": [\"n04074963\", \"remote_control\"], \"762\": [\"n04081281\", \"restaurant\"], \"763\": [\"n04086273\", \"revolver\"], \"764\": [\"n04090263\", \"rifle\"], \"765\": [\"n04099969\", \"rocking_chair\"], \"766\": [\"n04111531\", \"rotisserie\"], \"767\": [\"n04116512\", \"rubber_eraser\"], \"768\": [\"n04118538\", \"rugby_ball\"], \"769\": [\"n04118776\", \"rule\"], \"770\": [\"n04120489\", \"running_shoe\"], \"771\": [\"n04125021\", \"safe\"], \"772\": [\"n04127249\", \"safety_pin\"], \"773\": [\"n04131690\", \"saltshaker\"], \"774\": [\"n04133789\", \"sandal\"], \"775\": [\"n04136333\", \"sarong\"], \"776\": [\"n04141076\", \"sax\"], \"777\": [\"n04141327\", \"scabbard\"], \"778\": [\"n04141975\", \"scale\"], \"779\": [\"n04146614\", \"school_bus\"], \"780\": [\"n04147183\", \"schooner\"], \"781\": [\"n04149813\", \"scoreboard\"], \"782\": [\"n04152593\", \"screen\"], \"783\": [\"n04153751\", \"screw\"], \"784\": [\"n04154565\", \"screwdriver\"], \"785\": [\"n04162706\", \"seat_belt\"], \"786\": [\"n04179913\", \"sewing_machine\"], \"787\": [\"n04192698\", \"shield\"], \"788\": [\"n04200800\", \"shoe_shop\"], \"789\": [\"n04201297\", \"shoji\"], \"790\": [\"n04204238\", \"shopping_basket\"], \"791\": [\"n04204347\", \"shopping_cart\"], \"792\": [\"n04208210\", \"shovel\"], \"793\": [\"n04209133\", \"shower_cap\"], \"794\": [\"n04209239\", \"shower_curtain\"], \"795\": [\"n04228054\", \"ski\"], \"796\": [\"n04229816\", \"ski_mask\"], \"797\": [\"n04235860\", \"sleeping_bag\"], \"798\": [\"n04238763\", \"slide_rule\"], \"799\": [\"n04239074\", \"sliding_door\"], \"800\": [\"n04243546\", \"slot\"], \"801\": [\"n04251144\", \"snorkel\"], \"802\": [\"n04252077\", \"snowmobile\"], \"803\": [\"n04252225\", \"snowplow\"], \"804\": [\"n04254120\", \"soap_dispenser\"], \"805\": [\"n04254680\", \"soccer_ball\"], \"806\": [\"n04254777\", \"sock\"], \"807\": [\"n04258138\", \"solar_dish\"], \"808\": [\"n04259630\", \"sombrero\"], \"809\": [\"n04263257\", \"soup_bowl\"], \"810\": [\"n04264628\", \"space_bar\"], \"811\": [\"n04265275\", \"space_heater\"], \"812\": [\"n04266014\", \"space_shuttle\"], \"813\": [\"n04270147\", \"spatula\"], \"814\": [\"n04273569\", \"speedboat\"], \"815\": [\"n04275548\", \"spider_web\"], \"816\": [\"n04277352\", \"spindle\"], \"817\": [\"n04285008\", \"sports_car\"], \"818\": [\"n04286575\", \"spotlight\"], \"819\": [\"n04296562\", \"stage\"], \"820\": [\"n04310018\", \"steam_locomotive\"], \"821\": [\"n04311004\", \"steel_arch_bridge\"], \"822\": [\"n04311174\", \"steel_drum\"], \"823\": [\"n04317175\", \"stethoscope\"], \"824\": [\"n04325704\", \"stole\"], \"825\": [\"n04326547\", \"stone_wall\"], \"826\": [\"n04328186\", \"stopwatch\"], \"827\": [\"n04330267\", \"stove\"], \"828\": [\"n04332243\", \"strainer\"], \"829\": [\"n04335435\", \"streetcar\"], \"830\": [\"n04336792\", \"stretcher\"], \"831\": [\"n04344873\", \"studio_couch\"], \"832\": [\"n04346328\", \"stupa\"], \"833\": [\"n04347754\", \"submarine\"], \"834\": [\"n04350905\", \"suit\"], \"835\": [\"n04355338\", \"sundial\"], \"836\": [\"n04355933\", \"sunglass\"], \"837\": [\"n04356056\", \"sunglasses\"], \"838\": [\"n04357314\", \"sunscreen\"], \"839\": [\"n04366367\", \"suspension_bridge\"], \"840\": [\"n04367480\", \"swab\"], \"841\": [\"n04370456\", \"sweatshirt\"], \"842\": [\"n04371430\", \"swimming_trunks\"], \"843\": [\"n04371774\", \"swing\"], \"844\": [\"n04372370\", \"switch\"], \"845\": [\"n04376876\", \"syringe\"], \"846\": [\"n04380533\", \"table_lamp\"], \"847\": [\"n04389033\", \"tank\"], \"848\": [\"n04392985\", \"tape_player\"], \"849\": [\"n04398044\", \"teapot\"], \"850\": [\"n04399382\", \"teddy\"], \"851\": [\"n04404412\", \"television\"], \"852\": [\"n04409515\", \"tennis_ball\"], \"853\": [\"n04417672\", \"thatch\"], \"854\": [\"n04418357\", \"theater_curtain\"], \"855\": [\"n04423845\", \"thimble\"], \"856\": [\"n04428191\", \"thresher\"], \"857\": [\"n04429376\", \"throne\"], \"858\": [\"n04435653\", \"tile_roof\"], \"859\": [\"n04442312\", \"toaster\"], \"860\": [\"n04443257\", \"tobacco_shop\"], \"861\": [\"n04447861\", \"toilet_seat\"], \"862\": [\"n04456115\", \"torch\"], \"863\": [\"n04458633\", \"totem_pole\"], \"864\": [\"n04461696\", \"tow_truck\"], \"865\": [\"n04462240\", \"toyshop\"], \"866\": [\"n04465501\", \"tractor\"], \"867\": [\"n04467665\", \"trailer_truck\"], \"868\": [\"n04476259\", \"tray\"], \"869\": [\"n04479046\", \"trench_coat\"], \"870\": [\"n04482393\", \"tricycle\"], \"871\": [\"n04483307\", \"trimaran\"], \"872\": [\"n04485082\", \"tripod\"], \"873\": [\"n04486054\", \"triumphal_arch\"], \"874\": [\"n04487081\", \"trolleybus\"], \"875\": [\"n04487394\", \"trombone\"], \"876\": [\"n04493381\", \"tub\"], \"877\": [\"n04501370\", \"turnstile\"], \"878\": [\"n04505470\", \"typewriter_keyboard\"], \"879\": [\"n04507155\", \"umbrella\"], \"880\": [\"n04509417\", \"unicycle\"], \"881\": [\"n04515003\", \"upright\"], \"882\": [\"n04517823\", \"vacuum\"], \"883\": [\"n04522168\", \"vase\"], \"884\": [\"n04523525\", \"vault\"], \"885\": [\"n04525038\", \"velvet\"], \"886\": [\"n04525305\", \"vending_machine\"], \"887\": [\"n04532106\", \"vestment\"], \"888\": [\"n04532670\", \"viaduct\"], \"889\": [\"n04536866\", \"violin\"], \"890\": [\"n04540053\", \"volleyball\"], \"891\": [\"n04542943\", \"waffle_iron\"], \"892\": [\"n04548280\", \"wall_clock\"], \"893\": [\"n04548362\", \"wallet\"], \"894\": [\"n04550184\", \"wardrobe\"], \"895\": [\"n04552348\", \"warplane\"], \"896\": [\"n04553703\", \"washbasin\"], \"897\": [\"n04554684\", \"washer\"], \"898\": [\"n04557648\", \"water_bottle\"], \"899\": [\"n04560804\", \"water_jug\"], \"900\": [\"n04562935\", \"water_tower\"], \"901\": [\"n04579145\", \"whiskey_jug\"], \"902\": [\"n04579432\", \"whistle\"], \"903\": [\"n04584207\", \"wig\"], \"904\": [\"n04589890\", \"window_screen\"], \"905\": [\"n04590129\", \"window_shade\"], \"906\": [\"n04591157\", \"Windsor_tie\"], \"907\": [\"n04591713\", \"wine_bottle\"], \"908\": [\"n04592741\", \"wing\"], \"909\": [\"n04596742\", \"wok\"], \"910\": [\"n04597913\", \"wooden_spoon\"], \"911\": [\"n04599235\", \"wool\"], \"912\": [\"n04604644\", \"worm_fence\"], \"913\": [\"n04606251\", \"wreck\"], \"914\": [\"n04612504\", \"yawl\"], \"915\": [\"n04613696\", \"yurt\"], \"916\": [\"n06359193\", \"web_site\"], \"917\": [\"n06596364\", \"comic_book\"], \"918\": [\"n06785654\", \"crossword_puzzle\"], \"919\": [\"n06794110\", \"street_sign\"], \"920\": [\"n06874185\", \"traffic_light\"], \"921\": [\"n07248320\", \"book_jacket\"], \"922\": [\"n07565083\", \"menu\"], \"923\": [\"n07579787\", \"plate\"], \"924\": [\"n07583066\", \"guacamole\"], \"925\": [\"n07584110\", \"consomme\"], \"926\": [\"n07590611\", \"hot_pot\"], \"927\": [\"n07613480\", \"trifle\"], \"928\": [\"n07614500\", \"ice_cream\"], \"929\": [\"n07615774\", \"ice_lolly\"], \"930\": [\"n07684084\", \"French_loaf\"], \"931\": [\"n07693725\", \"bagel\"], \"932\": [\"n07695742\", \"pretzel\"], \"933\": [\"n07697313\", \"cheeseburger\"], \"934\": [\"n07697537\", \"hotdog\"], \"935\": [\"n07711569\", \"mashed_potato\"], \"936\": [\"n07714571\", \"head_cabbage\"], \"937\": [\"n07714990\", \"broccoli\"], \"938\": [\"n07715103\", \"cauliflower\"], \"939\": [\"n07716358\", \"zucchini\"], \"940\": [\"n07716906\", \"spaghetti_squash\"], \"941\": [\"n07717410\", \"acorn_squash\"], \"942\": [\"n07717556\", \"butternut_squash\"], \"943\": [\"n07718472\", \"cucumber\"], \"944\": [\"n07718747\", \"artichoke\"], \"945\": [\"n07720875\", \"bell_pepper\"], \"946\": [\"n07730033\", \"cardoon\"], \"947\": [\"n07734744\", \"mushroom\"], \"948\": [\"n07742313\", \"Granny_Smith\"], \"949\": [\"n07745940\", \"strawberry\"], \"950\": [\"n07747607\", \"orange\"], \"951\": [\"n07749582\", \"lemon\"], \"952\": [\"n07753113\", \"fig\"], \"953\": [\"n07753275\", \"pineapple\"], \"954\": [\"n07753592\", \"banana\"], \"955\": [\"n07754684\", \"jackfruit\"], \"956\": [\"n07760859\", \"custard_apple\"], \"957\": [\"n07768694\", \"pomegranate\"], \"958\": [\"n07802026\", \"hay\"], \"959\": [\"n07831146\", \"carbonara\"], \"960\": [\"n07836838\", \"chocolate_sauce\"], \"961\": [\"n07860988\", \"dough\"], \"962\": [\"n07871810\", \"meat_loaf\"], \"963\": [\"n07873807\", \"pizza\"], \"964\": [\"n07875152\", \"potpie\"], \"965\": [\"n07880968\", \"burrito\"], \"966\": [\"n07892512\", \"red_wine\"], \"967\": [\"n07920052\", \"espresso\"], \"968\": [\"n07930864\", \"cup\"], \"969\": [\"n07932039\", \"eggnog\"], \"970\": [\"n09193705\", \"alp\"], \"971\": [\"n09229709\", \"bubble\"], \"972\": [\"n09246464\", \"cliff\"], \"973\": [\"n09256479\", \"coral_reef\"], \"974\": [\"n09288635\", \"geyser\"], \"975\": [\"n09332890\", \"lakeside\"], \"976\": [\"n09399592\", \"promontory\"], \"977\": [\"n09421951\", \"sandbar\"], \"978\": [\"n09428293\", \"seashore\"], \"979\": [\"n09468604\", \"valley\"], \"980\": [\"n09472597\", \"volcano\"], \"981\": [\"n09835506\", \"ballplayer\"], \"982\": [\"n10148035\", \"groom\"], \"983\": [\"n10565667\", \"scuba_diver\"], \"984\": [\"n11879895\", \"rapeseed\"], \"985\": [\"n11939491\", \"daisy\"], \"986\": [\"n12057211\", \"yellow_lady's_slipper\"], \"987\": [\"n12144580\", \"corn\"], \"988\": [\"n12267677\", \"acorn\"], \"989\": [\"n12620546\", \"hip\"], \"990\": [\"n12768682\", \"buckeye\"], \"991\": [\"n12985857\", \"coral_fungus\"], \"992\": [\"n12998815\", \"agaric\"], \"993\": [\"n13037406\", \"gyromitra\"], \"994\": [\"n13040303\", \"stinkhorn\"], \"995\": [\"n13044778\", \"earthstar\"], \"996\": [\"n13052670\", \"hen-of-the-woods\"], \"997\": [\"n13054560\", \"bolete\"], \"998\": [\"n13133613\", \"ear\"], \"999\": [\"n15075141\", \"toilet_tissue\"]}"
  },
  {
    "path": "pytorch_classification/mini_imagenet/model.py",
    "content": "from typing import List, Callable\n\nimport torch\nfrom torch import Tensor\nimport torch.nn as nn\n\n\ndef channel_shuffle(x: Tensor, groups: int) -> Tensor:\n\n    batch_size, num_channels, height, width = x.size()\n    channels_per_group = num_channels // groups\n\n    # reshape\n    # [batch_size, num_channels, height, width] -> [batch_size, groups, channels_per_group, height, width]\n    x = x.view(batch_size, groups, channels_per_group, height, width)\n\n    x = torch.transpose(x, 1, 2).contiguous()\n\n    # flatten\n    x = x.view(batch_size, -1, height, width)\n\n    return x\n\n\nclass InvertedResidual(nn.Module):\n    def __init__(self, input_c: int, output_c: int, stride: int):\n        super(InvertedResidual, self).__init__()\n\n        if stride not in [1, 2]:\n            raise ValueError(\"illegal stride value.\")\n        self.stride = stride\n\n        assert output_c % 2 == 0\n        branch_features = output_c // 2\n        # 当stride为1时，input_channel应该是branch_features的两倍\n        # python中 '<<' 是位运算，可理解为计算×2的快速方法\n        assert (self.stride != 1) or (input_c == branch_features << 1)\n\n        if self.stride == 2:\n            self.branch1 = nn.Sequential(\n                self.depthwise_conv(input_c, input_c, kernel_s=3, stride=self.stride, padding=1),\n                nn.BatchNorm2d(input_c),\n                nn.Conv2d(input_c, branch_features, kernel_size=1, stride=1, padding=0, bias=False),\n                nn.BatchNorm2d(branch_features),\n                nn.ReLU(inplace=True)\n            )\n        else:\n            self.branch1 = nn.Sequential()\n\n        self.branch2 = nn.Sequential(\n            nn.Conv2d(input_c if self.stride > 1 else branch_features, branch_features, kernel_size=1,\n                      stride=1, padding=0, bias=False),\n            nn.BatchNorm2d(branch_features),\n            nn.ReLU(inplace=True),\n            self.depthwise_conv(branch_features, branch_features, kernel_s=3, stride=self.stride, padding=1),\n            nn.BatchNorm2d(branch_features),\n            nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False),\n            nn.BatchNorm2d(branch_features),\n            nn.ReLU(inplace=True)\n        )\n\n    @staticmethod\n    def depthwise_conv(input_c: int,\n                       output_c: int,\n                       kernel_s: int,\n                       stride: int = 1,\n                       padding: int = 0,\n                       bias: bool = False) -> nn.Conv2d:\n        return nn.Conv2d(in_channels=input_c, out_channels=output_c, kernel_size=kernel_s,\n                         stride=stride, padding=padding, bias=bias, groups=input_c)\n\n    def forward(self, x: Tensor) -> Tensor:\n        if self.stride == 1:\n            x1, x2 = x.chunk(2, dim=1)\n            out = torch.cat((x1, self.branch2(x2)), dim=1)\n        else:\n            out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)\n\n        out = channel_shuffle(out, 2)\n\n        return out\n\n\nclass ShuffleNetV2(nn.Module):\n    def __init__(self,\n                 stages_repeats: List[int],\n                 stages_out_channels: List[int],\n                 num_classes: int = 1000,\n                 inverted_residual: Callable[..., nn.Module] = InvertedResidual):\n        super(ShuffleNetV2, self).__init__()\n\n        if len(stages_repeats) != 3:\n            raise ValueError(\"expected stages_repeats as list of 3 positive ints\")\n        if len(stages_out_channels) != 5:\n            raise ValueError(\"expected stages_out_channels as list of 5 positive ints\")\n        self._stage_out_channels = stages_out_channels\n\n        # input RGB image\n        input_channels = 3\n        output_channels = self._stage_out_channels[0]\n\n        self.conv1 = nn.Sequential(\n            nn.Conv2d(input_channels, output_channels, kernel_size=3, stride=2, padding=1, bias=False),\n            nn.BatchNorm2d(output_channels),\n            nn.ReLU(inplace=True)\n        )\n        input_channels = output_channels\n\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n\n        # Static annotations for mypy\n        self.stage2: nn.Sequential\n        self.stage3: nn.Sequential\n        self.stage4: nn.Sequential\n\n        stage_names = [\"stage{}\".format(i) for i in [2, 3, 4]]\n        for name, repeats, output_channels in zip(stage_names, stages_repeats,\n                                                  self._stage_out_channels[1:]):\n            seq = [inverted_residual(input_channels, output_channels, 2)]\n            for i in range(repeats - 1):\n                seq.append(inverted_residual(output_channels, output_channels, 1))\n            setattr(self, name, nn.Sequential(*seq))\n            input_channels = output_channels\n\n        output_channels = self._stage_out_channels[-1]\n        self.conv5 = nn.Sequential(\n            nn.Conv2d(input_channels, output_channels, kernel_size=1, stride=1, padding=0, bias=False),\n            nn.BatchNorm2d(output_channels),\n            nn.ReLU(inplace=True)\n        )\n\n        self.fc = nn.Linear(output_channels, num_classes)\n\n    def _forward_impl(self, x: Tensor) -> Tensor:\n        # See note [TorchScript super()]\n        x = self.conv1(x)\n        x = self.maxpool(x)\n        x = self.stage2(x)\n        x = self.stage3(x)\n        x = self.stage4(x)\n        x = self.conv5(x)\n        x = x.mean([2, 3])  # global pool\n        x = self.fc(x)\n        return x\n\n    def forward(self, x: Tensor) -> Tensor:\n        return self._forward_impl(x)\n\n\ndef shufflenet_v2_x1_0(num_classes=1000):\n    \"\"\"\n    Constructs a ShuffleNetV2 with 1.0x output channels, as described in\n    `\"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design\"\n    <https://arxiv.org/abs/1807.11164>`.\n    weight: https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth\n\n    :param num_classes:\n    :return:\n    \"\"\"\n    model = ShuffleNetV2(stages_repeats=[4, 8, 4],\n                         stages_out_channels=[24, 116, 232, 464, 1024],\n                         num_classes=num_classes)\n\n    return model\n\n\ndef shufflenet_v2_x0_5(num_classes=1000):\n    \"\"\"\n    Constructs a ShuffleNetV2 with 0.5x output channels, as described in\n    `\"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design\"\n    <https://arxiv.org/abs/1807.11164>`.\n    weight: https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth\n\n    :param num_classes:\n    :return:\n    \"\"\"\n    model = ShuffleNetV2(stages_repeats=[4, 8, 4],\n                         stages_out_channels=[24, 48, 96, 192, 1024],\n                         num_classes=num_classes)\n\n    return model\n"
  },
  {
    "path": "pytorch_classification/mini_imagenet/multi_train_utils/__init__.py",
    "content": "from .train_eval_utils import train_one_epoch, evaluate\nfrom .distributed_utils import init_distributed_mode, dist, cleanup\n"
  },
  {
    "path": "pytorch_classification/mini_imagenet/multi_train_utils/distributed_utils.py",
    "content": "import os\n\nimport torch\nimport torch.distributed as dist\n\n\ndef init_distributed_mode(args):\n    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:\n        args.rank = int(os.environ[\"RANK\"])\n        args.world_size = int(os.environ['WORLD_SIZE'])\n        args.gpu = int(os.environ['LOCAL_RANK'])\n    elif 'SLURM_PROCID' in os.environ:\n        args.rank = int(os.environ['SLURM_PROCID'])\n        args.gpu = args.rank % torch.cuda.device_count()\n    else:\n        print('Not using distributed mode')\n        args.distributed = False\n        return\n\n    args.distributed = True\n\n    torch.cuda.set_device(args.gpu)\n    args.dist_backend = 'nccl'  # 通信后端，nvidia GPU推荐使用NCCL\n    print('| distributed init (rank {}): {}'.format(\n        args.rank, args.dist_url), flush=True)\n    dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,\n                            world_size=args.world_size, rank=args.rank)\n    dist.barrier()\n\n\ndef cleanup():\n    dist.destroy_process_group()\n\n\ndef is_dist_avail_and_initialized():\n    \"\"\"检查是否支持分布式环境\"\"\"\n    if not dist.is_available():\n        return False\n    if not dist.is_initialized():\n        return False\n    return True\n\n\ndef get_world_size():\n    if not is_dist_avail_and_initialized():\n        return 1\n    return dist.get_world_size()\n\n\ndef get_rank():\n    if not is_dist_avail_and_initialized():\n        return 0\n    return dist.get_rank()\n\n\ndef is_main_process():\n    return get_rank() == 0\n\n\ndef reduce_value(value, average=True):\n    world_size = get_world_size()\n    if world_size < 2:  # 单GPU的情况\n        return value\n\n    with torch.no_grad():\n        dist.all_reduce(value)\n        if average:\n            value /= world_size\n\n        return value\n\n\ndef warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):\n\n    def f(x):\n        \"\"\"根据step数返回一个学习率倍率因子\"\"\"\n        if x >= warmup_iters:  # 当迭代数大于给定的warmup_iters时，倍率因子为1\n            return 1\n        alpha = float(x) / warmup_iters\n        # 迭代过程中倍率因子从warmup_factor -> 1\n        return warmup_factor * (1 - alpha) + alpha\n\n    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)\n"
  },
  {
    "path": "pytorch_classification/mini_imagenet/multi_train_utils/train_eval_utils.py",
    "content": "import sys\n\nfrom tqdm import tqdm\nimport torch\n\nfrom .distributed_utils import reduce_value, is_main_process, warmup_lr_scheduler\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch, use_amp=False, warmup=True):\n    model.train()\n    loss_function = torch.nn.CrossEntropyLoss()\n    accu_loss = torch.zeros(1).to(device)  # 累计损失\n    accu_num = torch.zeros(1).to(device)   # 累计预测正确的样本数\n    optimizer.zero_grad()\n\n    lr_scheduler = None\n    if epoch == 0 and warmup is True:  # 当训练第一轮（epoch=0）时，启用warmup训练方式，可理解为热身训练\n        warmup_factor = 1.0 / 1000\n        warmup_iters = min(1000, len(data_loader) - 1)\n\n        lr_scheduler = warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)\n\n    # 在进程0中打印训练进度\n    if is_main_process():\n        data_loader = tqdm(data_loader, file=sys.stdout)\n\n    enable_amp = use_amp and \"cuda\" in device.type\n    scaler = torch.cuda.amp.GradScaler(enabled=enable_amp)\n\n    sample_num = 0\n    for step, data in enumerate(data_loader):\n        images, labels = data\n        sample_num += images.shape[0]\n\n        with torch.cuda.amp.autocast(enabled=enable_amp):\n            pred = model(images.to(device))\n            loss = loss_function(pred, labels.to(device))\n\n            pred_classes = torch.max(pred, dim=1)[1]\n            accu_num += torch.eq(pred_classes, labels.to(device)).sum()\n\n        scaler.scale(loss).backward()\n        scaler.step(optimizer)\n        scaler.update()\n        optimizer.zero_grad()\n\n        loss = reduce_value(loss, average=True)\n        accu_loss += loss.detach()\n\n        # 在进程0中打印平均loss\n        if is_main_process():\n            info = \"[epoch {}] loss: {:.3f}, train_acc: {:.3f}, lr: {:.5f}\".format(\n                epoch,\n                accu_loss.item() / (step + 1),\n                accu_num.item() / sample_num,\n                optimizer.param_groups[0][\"lr\"])\n            data_loader.desc = info\n\n        if not torch.isfinite(loss):\n            print('WARNING: non-finite loss, ending training ', loss)\n            sys.exit(1)\n\n        if lr_scheduler is not None:  # 如果使用warmup训练，逐渐调整学习率\n            lr_scheduler.step()\n\n    # 等待所有进程计算完毕\n    if device != torch.device(\"cpu\"):\n        torch.cuda.synchronize(device)\n\n    return accu_loss.item() / (step + 1)\n\n\n@torch.no_grad()\ndef evaluate(model, data_loader, device):\n    model.eval()\n\n    # 验证集样本个数\n    num_samples = len(data_loader.dataset)\n\n    # 用于存储预测正确的样本个数\n    sum_num = torch.zeros(1).to(device)\n\n    # 在进程0中打印验证进度\n    if is_main_process():\n        data_loader = tqdm(data_loader, file=sys.stdout)\n\n    for step, data in enumerate(data_loader):\n        images, labels = data\n        pred = model(images.to(device))\n        pred = torch.max(pred, dim=1)[1]\n        sum_num += torch.eq(pred, labels.to(device)).sum()\n\n    # 等待所有进程计算完毕\n    if device != torch.device(\"cpu\"):\n        torch.cuda.synchronize(device)\n\n    sum_num = reduce_value(sum_num, average=False)\n    acc = sum_num.item() / num_samples\n\n    return acc\n\n\n\n\n\n\n"
  },
  {
    "path": "pytorch_classification/mini_imagenet/my_dataset.py",
    "content": "import os\nimport json\nfrom PIL import Image\nimport pandas as pd\nimport torch\nfrom torch.utils.data import Dataset\n\n\nclass MyDataSet(Dataset):\n    \"\"\"自定义数据集\"\"\"\n\n    def __init__(self,\n                 root_dir: str,\n                 csv_name: str,\n                 json_path: str,\n                 transform=None):\n        images_dir = os.path.join(root_dir, \"images\")\n        assert os.path.exists(images_dir), \"dir:'{}' not found.\".format(images_dir)\n\n        assert os.path.exists(json_path), \"file:'{}' not found.\".format(json_path)\n        self.label_dict = json.load(open(json_path, \"r\"))\n\n        csv_path = os.path.join(root_dir, csv_name)\n        assert os.path.exists(csv_path), \"file:'{}' not found.\".format(csv_path)\n        csv_data = pd.read_csv(csv_path)\n        self.total_num = csv_data.shape[0]\n        self.img_paths = [os.path.join(images_dir, i)for i in csv_data[\"filename\"].values]\n        self.img_label = [self.label_dict[i][0] for i in csv_data[\"label\"].values]\n        self.labels = set(csv_data[\"label\"].values)\n\n        self.transform = transform\n\n    def __len__(self):\n        return self.total_num\n\n    def __getitem__(self, item):\n        img = Image.open(self.img_paths[item])\n        # RGB为彩色图片，L为灰度图片\n        if img.mode != 'RGB':\n            raise ValueError(\"image: {} isn't RGB mode.\".format(self.img_paths[item]))\n        label = self.img_label[item]\n\n        if self.transform is not None:\n            img = self.transform(img)\n\n        return img, label\n\n    @staticmethod\n    def collate_fn(batch):\n        # 官方实现的default_collate可以参考\n        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py\n        images, labels = tuple(zip(*batch))\n\n        images = torch.stack(images, dim=0)\n        labels = torch.as_tensor(labels)\n        return images, labels\n"
  },
  {
    "path": "pytorch_classification/mini_imagenet/restructure_csv.py",
    "content": "import os\nimport json\n\nimport pandas as pd\nfrom PIL import Image\nimport matplotlib.pyplot as plt\n\n\ndef read_csv_classes(csv_dir: str, csv_name: str):\n    data = pd.read_csv(os.path.join(csv_dir, csv_name))\n    # print(data.head(1))  # filename, label\n\n    label_set = set(data[\"label\"].drop_duplicates().values)\n\n    print(\"{} have {} images and {} classes.\".format(csv_name,\n                                                     data.shape[0],\n                                                     len(label_set)))\n    return data, label_set\n\n\ndef calculate_split_info(path: str, label_dict: dict, rate: float = 0.2):\n    # read all images\n    image_dir = os.path.join(path, \"images\")\n    images_list = [i for i in os.listdir(image_dir) if i.endswith(\".jpg\")]\n    print(\"find {} images in dataset.\".format(len(images_list)))\n\n    train_data, train_label = read_csv_classes(path, \"train.csv\")\n    val_data, val_label = read_csv_classes(path, \"val.csv\")\n    test_data, test_label = read_csv_classes(path, \"test.csv\")\n\n    # Union operation\n    labels = (train_label | val_label | test_label)\n    labels = list(labels)\n    labels.sort()\n    print(\"all classes: {}\".format(len(labels)))\n\n    # create classes_name.json\n    classes_label = dict([(label, [index, label_dict[label]]) for index, label in enumerate(labels)])\n    json_str = json.dumps(classes_label, indent=4)\n    with open('classes_name.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    # concat csv data\n    data = pd.concat([train_data, val_data, test_data], axis=0)\n    print(\"total data shape: {}\".format(data.shape))\n\n    # split data on every classes\n    num_every_classes = []\n    split_train_data = []\n    split_val_data = []\n    for label in labels:\n        class_data = data[data[\"label\"] == label]\n        num_every_classes.append(class_data.shape[0])\n\n        # shuffle\n        shuffle_data = class_data.sample(frac=1, random_state=1)\n        num_train_sample = int(class_data.shape[0] * (1 - rate))\n        split_train_data.append(shuffle_data[:num_train_sample])\n        split_val_data.append(shuffle_data[num_train_sample:])\n\n        # imshow\n        imshow_flag = False\n        if imshow_flag:\n            img_name, img_label = shuffle_data.iloc[0].values\n            img = Image.open(os.path.join(image_dir, img_name))\n            plt.imshow(img)\n            plt.title(\"class: \" + classes_label[img_label][1])\n            plt.show()\n\n    # plot classes distribution\n    plot_flag = False\n    if plot_flag:\n        plt.bar(range(1, 101), num_every_classes, align='center')\n        plt.show()\n\n    # concatenate data\n    new_train_data = pd.concat(split_train_data, axis=0)\n    new_val_data = pd.concat(split_val_data, axis=0)\n\n    # save new csv data\n    new_train_data.to_csv(os.path.join(path, \"new_train.csv\"))\n    new_val_data.to_csv(os.path.join(path, \"new_val.csv\"))\n\n\ndef main():\n    data_dir = \"/data/mini-imagenet/\"\n    json_path = \"./imagenet_class_index.json\"\n\n    # load imagenet labels\n    label_dict = json.load(open(json_path, \"r\"))\n    label_dict = dict([(v[0], v[1]) for k, v in label_dict.items()])\n\n    calculate_split_info(data_dir, label_dict)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/mini_imagenet/train_multi_gpu_using_launch.py",
    "content": "import os\nimport math\nimport tempfile\nimport argparse\n\nimport torch\nimport torch.optim as optim\nimport torch.optim.lr_scheduler as lr_scheduler\nfrom torch.utils.tensorboard import SummaryWriter\nfrom torchvision import transforms\n\nfrom model import shufflenet_v2_x1_0\nfrom my_dataset import MyDataSet\nfrom multi_train_utils import train_one_epoch, evaluate, init_distributed_mode, dist, cleanup\n\n\ndef main(args):\n    if torch.cuda.is_available() is False:\n        raise EnvironmentError(\"not find GPU device for training.\")\n\n    # 初始化各进程环境\n    init_distributed_mode(args=args)\n\n    rank = args.rank\n    device = torch.device(args.device)\n    batch_size = args.batch_size\n    num_classes = args.num_classes\n    weights_path = args.weights\n    args.lr *= args.world_size  # 学习率要根据并行GPU的数量进行倍增\n\n    if rank == 0:  # 在第一个进程中打印信息，并实例化tensorboard\n        print(args)\n        print('Start Tensorboard with \"tensorboard --logdir=runs\", view at http://localhost:6006/')\n        tb_writer = SummaryWriter()\n        if os.path.exists(\"./weights\") is False:\n            os.makedirs(\"./weights\")\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.RandomResizedCrop(224),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),\n        \"val\": transforms.Compose([transforms.Resize(256),\n                                   transforms.CenterCrop(224),\n                                   transforms.ToTensor(),\n                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}\n\n    data_root = args.data_path\n    json_path = \"./classes_name.json\"\n    # 实例化训练数据集\n    train_dataset = MyDataSet(root_dir=data_root,\n                              csv_name=\"new_train.csv\",\n                              json_path=json_path,\n                              transform=data_transform[\"train\"])\n\n    # check num_classes\n    if args.num_classes != len(train_dataset.labels):\n        raise ValueError(\"dataset have {} classes, but input {}\".format(len(train_dataset.labels),\n                                                                        args.num_classes))\n\n    # 实例化验证数据集\n    val_dataset = MyDataSet(root_dir=data_root,\n                            csv_name=\"new_val.csv\",\n                            json_path=json_path,\n                            transform=data_transform[\"val\"])\n\n    # 给每个rank对应的进程分配训练的样本索引\n    train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)\n    val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)\n\n    # 将样本索引每batch_size个元素组成一个list\n    train_batch_sampler = torch.utils.data.BatchSampler(\n        train_sampler, batch_size, drop_last=True)\n\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    if rank == 0:\n        print('Using {} dataloader workers every process'.format(nw))\n\n    train_loader = torch.utils.data.DataLoader(train_dataset,\n                                               batch_sampler=train_batch_sampler,\n                                               pin_memory=True,\n                                               num_workers=nw,\n                                               collate_fn=train_dataset.collate_fn)\n\n    val_loader = torch.utils.data.DataLoader(val_dataset,\n                                             batch_size=1,\n                                             sampler=val_sampler,\n                                             pin_memory=True,\n                                             num_workers=nw,\n                                             collate_fn=val_dataset.collate_fn)\n    # 实例化模型\n    model = shufflenet_v2_x1_0(num_classes=num_classes).to(device)\n\n    # 如果存在预训练权重则载入\n    if os.path.exists(weights_path):\n        weights_dict = torch.load(weights_path, map_location=device)\n        load_weights_dict = {k: v for k, v in weights_dict.items()\n                             if model.state_dict()[k].numel() == v.numel()}\n        model.load_state_dict(load_weights_dict, strict=False)\n    else:\n        checkpoint_path = os.path.join(tempfile.gettempdir(), \"initial_weights.pt\")\n        # 如果不存在预训练权重，需要将第一个进程中的权重保存，然后其他进程载入，保持初始化权重一致\n        if rank == 0:\n            torch.save(model.state_dict(), checkpoint_path)\n\n        dist.barrier()\n        # 这里注意，一定要指定map_location参数，否则会导致第一块GPU占用更多资源\n        model.load_state_dict(torch.load(checkpoint_path, map_location=device))\n\n    # 是否冻结权重\n    if args.freeze_layers:\n        for name, para in model.named_parameters():\n            # 除最后的全连接层外，其他权重全部冻结\n            if \"fc\" not in name:\n                para.requires_grad_(False)\n    else:\n        # 只有训练带有BN结构的网络时使用SyncBatchNorm采用意义\n        if args.syncBN:\n            # 使用SyncBatchNorm后训练会更耗时\n            model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)\n\n    # 转为DDP模型\n    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])\n\n    # optimizer\n    pg = [p for p in model.parameters() if p.requires_grad]\n    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=4E-5)\n    # Scheduler https://arxiv.org/pdf/1812.01187.pdf\n    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine\n    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)\n\n    for epoch in range(args.epochs):\n        train_sampler.set_epoch(epoch)\n\n        mean_loss = train_one_epoch(model=model,\n                                    optimizer=optimizer,\n                                    data_loader=train_loader,\n                                    device=device,\n                                    epoch=epoch)\n\n        scheduler.step()\n\n        acc = evaluate(model=model,\n                       data_loader=val_loader,\n                       device=device)\n\n        if rank == 0:\n            print(\"[epoch {}] accuracy: {}\".format(epoch, round(acc, 3)))\n            tags = [\"loss\", \"accuracy\", \"learning_rate\"]\n            tb_writer.add_scalar(tags[0], mean_loss, epoch)\n            tb_writer.add_scalar(tags[1], acc, epoch)\n            tb_writer.add_scalar(tags[2], optimizer.param_groups[0][\"lr\"], epoch)\n\n            torch.save(model.module.state_dict(), \"./weights/model-{}.pth\".format(epoch))\n\n    # 删除临时缓存文件\n    if rank == 0:\n        if os.path.exists(checkpoint_path) is True:\n            os.remove(checkpoint_path)\n\n    cleanup()\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--num_classes', type=int, default=100)\n    parser.add_argument('--epochs', type=int, default=100)\n    parser.add_argument('--batch-size', type=int, default=32)\n    parser.add_argument('--lr', type=float, default=0.01)\n    parser.add_argument('--lrf', type=float, default=0.0001)\n    # 是否启用SyncBatchNorm\n    parser.add_argument('--syncBN', type=bool, default=True)\n\n    # 数据集所在根目录\n    parser.add_argument('--data-path', type=str,\n                        default=\"/home/wz/mini-imagenet/\")\n\n    parser.add_argument('--weights', type=str, default='',\n                        help='initial weights path')\n\n    parser.add_argument('--freeze-layers', type=bool, default=False)\n\n    # 不要改该参数，系统会自动分配\n    parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0,1 or cpu)')\n\n    # 开启的进程数(注意不是线程),不用设置该参数，会根据nproc_per_node自动设置\n    parser.add_argument('--world-size', default=4, type=int,\n                        help='number of distributed processes')\n\n    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')\n    opt = parser.parse_args()\n\n    main(opt)\n"
  },
  {
    "path": "pytorch_classification/mini_imagenet/train_single_gpu.py",
    "content": "import os\nimport math\nimport argparse\n\nimport torch\nimport torch.optim as optim\nfrom torch.utils.tensorboard import SummaryWriter\nfrom torchvision import transforms\nimport torch.optim.lr_scheduler as lr_scheduler\n\nfrom model import shufflenet_v2_x1_0\nfrom my_dataset import MyDataSet\nfrom multi_train_utils import train_one_epoch, evaluate\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n\n    print(args)\n    print('Start Tensorboard with \"tensorboard --logdir=runs\", view at http://localhost:6006/')\n    tb_writer = SummaryWriter()\n    if os.path.exists(\"./weights\") is False:\n        os.makedirs(\"./weights\")\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.RandomResizedCrop(224),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),\n        \"val\": transforms.Compose([transforms.Resize(256),\n                                   transforms.CenterCrop(224),\n                                   transforms.ToTensor(),\n                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}\n\n    data_root = args.data_path\n    json_path = \"./classes_name.json\"\n    # 实例化训练数据集\n    train_dataset = MyDataSet(root_dir=data_root,\n                              csv_name=\"new_train.csv\",\n                              json_path=json_path,\n                              transform=data_transform[\"train\"])\n\n    # check num_classes\n    if args.num_classes != len(train_dataset.labels):\n        raise ValueError(\"dataset have {} classes, but input {}\".format(len(train_dataset.labels),\n                                                                        args.num_classes))\n\n    # 实例化验证数据集\n    val_dataset = MyDataSet(root_dir=data_root,\n                            csv_name=\"new_val.csv\",\n                            json_path=json_path,\n                            transform=data_transform[\"val\"])\n\n    batch_size = args.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using {} dataloader workers every process'.format(nw))\n    train_loader = torch.utils.data.DataLoader(train_dataset,\n                                               batch_size=batch_size,\n                                               shuffle=True,\n                                               pin_memory=True,\n                                               num_workers=nw,\n                                               collate_fn=train_dataset.collate_fn)\n\n    val_loader = torch.utils.data.DataLoader(val_dataset,\n                                             batch_size=batch_size,\n                                             shuffle=False,\n                                             pin_memory=True,\n                                             num_workers=nw,\n                                             collate_fn=val_dataset.collate_fn)\n\n    # create model\n    model = shufflenet_v2_x1_0(num_classes=args.num_classes).to(device)\n\n    # 如果存在预训练权重则载入\n    # if args.weights != \"\":\n    #     if os.path.exists(args.weights):\n    #         weights_dict = torch.load(args.weights, map_location=device)\n    #         load_weights_dict = {k: v for k, v in weights_dict.items()\n    #                              if model.state_dict()[k].numel() == v.numel()}\n    #         print(model.load_state_dict(load_weights_dict, strict=False))\n    #     else:\n    #         raise FileNotFoundError(\"not found weights file: {}\".format(args.weights))\n\n    # 是否冻结权重\n    # if args.freeze_layers:\n    #     for name, para in model.named_parameters():\n    #         # 除最后的全连接层外，其他权重全部冻结\n    #         if \"fc\" not in name:\n    #             para.requires_grad_(False)\n\n    pg = [p for p in model.parameters() if p.requires_grad]\n    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=4E-5)\n    # Scheduler https://arxiv.org/pdf/1812.01187.pdf\n    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine\n    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)\n\n    for epoch in range(args.epochs):\n        # train\n        mean_loss = train_one_epoch(model=model,\n                                    optimizer=optimizer,\n                                    data_loader=train_loader,\n                                    device=device,\n                                    epoch=epoch,\n                                    warmup=True)\n\n        scheduler.step()\n\n        # validate\n        acc = evaluate(model=model,\n                       data_loader=val_loader,\n                       device=device)\n\n        print(\"[epoch {}] accuracy: {}\".format(epoch, round(acc, 3)))\n        tags = [\"loss\", \"accuracy\", \"learning_rate\"]\n        tb_writer.add_scalar(tags[0], mean_loss, epoch)\n        tb_writer.add_scalar(tags[1], acc, epoch)\n        tb_writer.add_scalar(tags[2], optimizer.param_groups[0][\"lr\"], epoch)\n\n        torch.save(model.state_dict(), \"./weights/model-{}.pth\".format(epoch))\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--num_classes', type=int, default=100)\n    parser.add_argument('--epochs', type=int, default=100)\n    parser.add_argument('--batch-size', type=int, default=32)\n    parser.add_argument('--lr', type=float, default=0.1)\n    parser.add_argument('--lrf', type=float, default=0.0001)\n\n    # 数据集所在根目录\n    parser.add_argument('--data-path', type=str, default=\"/home/wz/mini-imagenet/\")\n\n    parser.add_argument('--weights', type=str, default='',\n                        help='initial weights path')\n    parser.add_argument('--freeze-layers', type=bool, default=False)\n    parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0,1 or cpu)')\n\n    opt = parser.parse_args()\n\n    main(opt)\n"
  },
  {
    "path": "pytorch_classification/model_complexity/main.py",
    "content": "import torch\nfrom fvcore.nn import FlopCountAnalysis, parameter_count_table\nfrom prettytable import PrettyTable\nfrom model import efficientnetv2_s\n\n\ndef main():\n    model = efficientnetv2_s()\n\n    # option1\n    for name, para in model.named_parameters():\n        # 除head外，其他权重全部冻结\n        if \"head\" not in name:\n            para.requires_grad_(False)\n        else:\n            print(\"training {}\".format(name))\n\n    complexity = model.complexity(224, 224, 3)\n    table = PrettyTable()\n    table.field_names = [\"params\", \"freeze-params\", \"train-params\", \"FLOPs\", \"acts\"]\n    table.add_row([complexity[\"params\"],\n                   complexity[\"freeze\"],\n                   complexity[\"params\"] - complexity[\"freeze\"],\n                   complexity[\"flops\"],\n                   complexity[\"acts\"]])\n    print(table)\n\n    # option2\n    tensor = (torch.rand(1, 3, 224, 224),)\n    flops = FlopCountAnalysis(model, tensor)\n    print(flops.total())\n\n    print(parameter_count_table(model))\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/model_complexity/model.py",
    "content": "from collections import OrderedDict\nfrom functools import partial\nfrom typing import Callable, Optional\n\nimport torch.nn as nn\nimport torch\nfrom torch import Tensor\n\nfrom utils import *\n\n\ndef drop_path(x, drop_prob: float = 0., training: bool = False):\n    \"\"\"\n    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).\n    \"Deep Networks with Stochastic Depth\", https://arxiv.org/pdf/1603.09382.pdf\n\n    This function is taken from the rwightman.\n    It can be seen here:\n    https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py#L140\n    \"\"\"\n    if drop_prob == 0. or not training:\n        return x\n    keep_prob = 1 - drop_prob\n    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets\n    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)\n    random_tensor.floor_()  # binarize\n    output = x.div(keep_prob) * random_tensor\n    return output\n\n\nclass DropPath(nn.Module):\n    \"\"\"\n    Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).\n    \"Deep Networks with Stochastic Depth\", https://arxiv.org/pdf/1603.09382.pdf\n    \"\"\"\n    def __init__(self, drop_prob=None):\n        super(DropPath, self).__init__()\n        self.drop_prob = drop_prob\n\n    def forward(self, x):\n        return drop_path(x, self.drop_prob, self.training)\n\n\nclass ConvBNAct(nn.Module):\n    def __init__(self,\n                 in_planes: int,\n                 out_planes: int,\n                 kernel_size: int = 3,\n                 stride: int = 1,\n                 groups: int = 1,\n                 norm_layer: Optional[Callable[..., nn.Module]] = None,\n                 activation_layer: Optional[Callable[..., nn.Module]] = None):\n        super(ConvBNAct, self).__init__()\n\n        padding = (kernel_size - 1) // 2\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n        if activation_layer is None:\n            activation_layer = nn.SiLU  # alias Swish  (torch>=1.7)\n\n        self.conv = nn.Conv2d(in_channels=in_planes,\n                              out_channels=out_planes,\n                              kernel_size=kernel_size,\n                              stride=stride,\n                              padding=padding,\n                              groups=groups,\n                              bias=False)\n\n        self.bn = norm_layer(out_planes)\n        self.act = activation_layer()\n\n    def forward(self, x):\n        result = self.conv(x)\n        result = self.bn(result)\n        result = self.act(result)\n\n        return result\n\n    def complexity(self, cx):\n        cx = conv2d_cx(cx,\n                       in_c=self.conv.in_channels,\n                       out_c=self.conv.out_channels,\n                       k=self.conv.kernel_size[0],  # tuple type\n                       stride=self.conv.stride[0],  # tuple type\n                       groups=self.conv.groups,\n                       bias=False,\n                       trainable=self.conv.weight.requires_grad)\n        cx = norm2d_cx(cx, self.conv.out_channels, trainable=self.bn.weight.requires_grad)\n\n        return cx\n\n\nclass SqueezeExcite(nn.Module):\n    def __init__(self,\n                 input_c: int,   # block input channel\n                 expand_c: int,  # block expand channel\n                 se_ratio: float = 0.25):\n        super(SqueezeExcite, self).__init__()\n        squeeze_c = int(input_c * se_ratio)\n        self.conv_reduce = nn.Conv2d(expand_c, squeeze_c, 1)\n        self.act1 = nn.SiLU()  # alias Swish\n        self.conv_expand = nn.Conv2d(squeeze_c, expand_c, 1)\n        self.act2 = nn.Sigmoid()\n\n    def forward(self, x: Tensor) -> Tensor:\n        scale = x.mean((2, 3), keepdim=True)\n        scale = self.conv_reduce(scale)\n        scale = self.act1(scale)\n        scale = self.conv_expand(scale)\n        scale = self.act2(scale)\n        return scale * x\n\n    def complexity(self, cx):\n        h, w = cx[\"h\"], cx[\"w\"]\n        cx = gap2d_cx(cx)\n        cx = conv2d_cx(cx,\n                       in_c=self.conv_reduce.in_channels,\n                       out_c=self.conv_reduce.out_channels,\n                       k=1,\n                       bias=True,\n                       trainable=self.conv_reduce.weight.requires_grad)\n        cx = conv2d_cx(cx,\n                       in_c=self.conv_expand.in_channels,\n                       out_c=self.conv_expand.out_channels,\n                       k=1,\n                       bias=True,\n                       trainable=self.conv_expand.weight.requires_grad)\n        cx[\"h\"], cx[\"w\"] = h, w\n\n        return cx\n\n\nclass MBConv(nn.Module):\n    def __init__(self,\n                 kernel_size: int,\n                 input_c: int,\n                 out_c: int,\n                 expand_ratio: int,\n                 stride: int,\n                 se_ratio: float,\n                 drop_rate: float,\n                 norm_layer: Callable[..., nn.Module]):\n        super(MBConv, self).__init__()\n\n        if stride not in [1, 2]:\n            raise ValueError(\"illegal stride value.\")\n\n        self.has_shortcut = (stride == 1 and input_c == out_c)\n\n        activation_layer = nn.SiLU  # alias Swish\n        expanded_c = input_c * expand_ratio\n\n        # 在EfficientNetV2中，MBConv中不存在expansion=1的情况所以conv_pw肯定存在\n        assert expand_ratio != 1\n        # Point-wise expansion\n        self.expand_conv = ConvBNAct(input_c,\n                                     expanded_c,\n                                     kernel_size=1,\n                                     norm_layer=norm_layer,\n                                     activation_layer=activation_layer)\n\n        # Depth-wise convolution\n        self.dwconv = ConvBNAct(expanded_c,\n                                expanded_c,\n                                kernel_size=kernel_size,\n                                stride=stride,\n                                groups=expanded_c,\n                                norm_layer=norm_layer,\n                                activation_layer=activation_layer)\n\n        self.se = SqueezeExcite(input_c, expanded_c, se_ratio) if se_ratio > 0 else nn.Identity()\n\n        # Point-wise linear projection\n        self.project_conv = ConvBNAct(expanded_c,\n                                      out_planes=out_c,\n                                      kernel_size=1,\n                                      norm_layer=norm_layer,\n                                      activation_layer=nn.Identity)  # 注意这里没有激活函数，所有传入Identity\n\n        self.out_channels = out_c\n\n        # 只有在使用shortcut连接时才使用dropout层\n        self.drop_rate = drop_rate\n        if self.has_shortcut and drop_rate > 0:\n            self.dropout = DropPath(drop_rate)\n\n    def forward(self, x: Tensor) -> Tensor:\n        result = self.expand_conv(x)\n        result = self.dwconv(result)\n        result = self.se(result)\n        result = self.project_conv(result)\n\n        if self.has_shortcut:\n            if self.drop_rate > 0:\n                result = self.dropout(result)\n            result += x\n\n        return result\n\n    def complexity(self, cx):\n        cx = self.expand_conv.complexity(cx)\n        cx = self.dwconv.complexity(cx)\n        cx = self.se.complexity(cx)\n        cx = self.project_conv.complexity(cx)\n\n        return cx\n\n\nclass FusedMBConv(nn.Module):\n    def __init__(self,\n                 kernel_size: int,\n                 input_c: int,\n                 out_c: int,\n                 expand_ratio: int,\n                 stride: int,\n                 se_ratio: float,\n                 drop_rate: float,\n                 norm_layer: Callable[..., nn.Module]):\n        super(FusedMBConv, self).__init__()\n\n        assert stride in [1, 2]\n        assert se_ratio == 0\n\n        self.has_shortcut = stride == 1 and input_c == out_c\n        self.drop_rate = drop_rate\n\n        self.has_expansion = expand_ratio != 1\n\n        activation_layer = nn.SiLU  # alias Swish\n        expanded_c = input_c * expand_ratio\n\n        # 只有当expand ratio不等于1时才有expand conv\n        if self.has_expansion:\n            # Expansion convolution\n            self.expand_conv = ConvBNAct(input_c,\n                                         expanded_c,\n                                         kernel_size=kernel_size,\n                                         stride=stride,\n                                         norm_layer=norm_layer,\n                                         activation_layer=activation_layer)\n\n            self.project_conv = ConvBNAct(expanded_c,\n                                          out_c,\n                                          kernel_size=1,\n                                          norm_layer=norm_layer,\n                                          activation_layer=nn.Identity)  # 注意没有激活函数\n        else:\n            # 当只有project_conv时的情况\n            self.project_conv = ConvBNAct(input_c,\n                                          out_c,\n                                          kernel_size=kernel_size,\n                                          stride=stride,\n                                          norm_layer=norm_layer,\n                                          activation_layer=activation_layer)  # 注意有激活函数\n\n        self.out_channels = out_c\n\n        # 只有在使用shortcut连接时才使用dropout层\n        self.drop_rate = drop_rate\n        if self.has_shortcut and drop_rate > 0:\n            self.dropout = DropPath(drop_rate)\n\n    def forward(self, x: Tensor) -> Tensor:\n        if self.has_expansion:\n            result = self.expand_conv(x)\n            result = self.project_conv(result)\n        else:\n            result = self.project_conv(x)\n\n        if self.has_shortcut:\n            if self.drop_rate > 0:\n                result = self.dropout(result)\n\n            result += x\n\n        return result\n\n    def complexity(self, cx):\n        if self.has_expansion:\n            cx = self.expand_conv.complexity(cx)\n            cx = self.project_conv.complexity(cx)\n        else:\n            cx = self.project_conv.complexity(cx)\n\n        return cx\n\n\nclass EfficientNetV2(nn.Module):\n    def __init__(self,\n                 model_cnf: list,\n                 num_classes: int = 1000,\n                 num_features: int = 1280,\n                 dropout_rate: float = 0.2,\n                 drop_connect_rate: float = 0.2):\n        super(EfficientNetV2, self).__init__()\n\n        for cnf in model_cnf:\n            assert len(cnf) == 8\n        self.model_cnf = model_cnf\n        self.num_classes = num_classes\n        self.num_features = num_features\n\n        norm_layer = partial(nn.BatchNorm2d, eps=1e-3, momentum=0.1)\n\n        stem_filter_num = model_cnf[0][4]\n\n        self.stem = ConvBNAct(3,\n                              stem_filter_num,\n                              kernel_size=3,\n                              stride=2,\n                              norm_layer=norm_layer)  # 激活函数默认是SiLU\n\n        total_blocks = sum([i[0] for i in model_cnf])\n        block_id = 0\n        blocks = []\n        for cnf in model_cnf:\n            repeats = cnf[0]\n            op = FusedMBConv if cnf[-2] == 0 else MBConv\n            for i in range(repeats):\n                blocks.append(op(kernel_size=cnf[1],\n                                 input_c=cnf[4] if i == 0 else cnf[5],\n                                 out_c=cnf[5],\n                                 expand_ratio=cnf[3],\n                                 stride=cnf[2] if i == 0 else 1,\n                                 se_ratio=cnf[-1],\n                                 drop_rate=drop_connect_rate * block_id / total_blocks,\n                                 norm_layer=norm_layer))\n                block_id += 1\n        self.blocks = nn.Sequential(*blocks)\n\n        head_input_c = model_cnf[-1][-3]\n        head = OrderedDict()\n\n        head.update({\"project_conv\": ConvBNAct(head_input_c,\n                                               num_features,\n                                               kernel_size=1,\n                                               norm_layer=norm_layer)})  # 激活函数默认是SiLU\n\n        head.update({\"avgpool\": nn.AdaptiveAvgPool2d(1)})\n        head.update({\"flatten\": nn.Flatten()})\n\n        if dropout_rate > 0:\n            head.update({\"dropout\": nn.Dropout(p=dropout_rate, inplace=True)})\n        head.update({\"classifier\": nn.Linear(num_features, num_classes)})\n\n        self.head = nn.Sequential(head)\n\n        # initial weights\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode=\"fan_out\")\n                if m.bias is not None:\n                    nn.init.zeros_(m.bias)\n            elif isinstance(m, nn.BatchNorm2d):\n                nn.init.ones_(m.weight)\n                nn.init.zeros_(m.bias)\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                nn.init.zeros_(m.bias)\n\n    def forward(self, x: Tensor) -> Tensor:\n        x = self.stem(x)\n        x = self.blocks(x)\n        x = self.head(x)\n\n        return x\n\n    def complexity(self, h, w, c):\n        cx = {\"h\": h, \"w\": w, \"c\": c, \"flops\": 0, \"params\": 0, \"acts\": 0, \"freeze\": 0}\n        cx = self.stem.complexity(cx)\n\n        for module in self.blocks.children():\n            if hasattr(module, \"complexity\"):\n                cx = module.complexity(cx)\n            else:\n                print(module)\n\n        for module in self.head.children():\n            if hasattr(module, \"complexity\"):\n                cx = module.complexity(cx)\n            elif isinstance(module, nn.Linear):\n                in_units = module.in_features\n                out_units = module.out_features\n                cx = gap2d_cx(cx)\n                cx = linear_cx(cx, in_units, out_units, bias=True, trainable=module.weight.requires_grad)\n        # print(cx)\n        return cx\n\n\ndef efficientnetv2_s(num_classes: int = 1000):\n    \"\"\"\n    EfficientNetV2\n    https://arxiv.org/abs/2104.00298\n    \"\"\"\n    # train_size: 300, eval_size: 384\n\n    # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio\n    model_config = [[2, 3, 1, 1, 24, 24, 0, 0],\n                    [4, 3, 2, 4, 24, 48, 0, 0],\n                    [4, 3, 2, 4, 48, 64, 0, 0],\n                    [6, 3, 2, 4, 64, 128, 1, 0.25],\n                    [9, 3, 1, 6, 128, 160, 1, 0.25],\n                    [15, 3, 2, 6, 160, 256, 1, 0.25]]\n\n    model = EfficientNetV2(model_cnf=model_config,\n                           num_classes=num_classes,\n                           dropout_rate=0.2)\n    return model\n\n\ndef efficientnetv2_m(num_classes: int = 1000):\n    \"\"\"\n    EfficientNetV2\n    https://arxiv.org/abs/2104.00298\n    \"\"\"\n    # train_size: 384, eval_size: 480\n\n    # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio\n    model_config = [[3, 3, 1, 1, 24, 24, 0, 0],\n                    [5, 3, 2, 4, 24, 48, 0, 0],\n                    [5, 3, 2, 4, 48, 80, 0, 0],\n                    [7, 3, 2, 4, 80, 160, 1, 0.25],\n                    [14, 3, 1, 6, 160, 176, 1, 0.25],\n                    [18, 3, 2, 6, 176, 304, 1, 0.25],\n                    [5, 3, 1, 6, 304, 512, 1, 0.25]]\n\n    model = EfficientNetV2(model_cnf=model_config,\n                           num_classes=num_classes,\n                           dropout_rate=0.3)\n    return model\n\n\ndef efficientnetv2_l(num_classes: int = 1000):\n    \"\"\"\n    EfficientNetV2\n    https://arxiv.org/abs/2104.00298\n    \"\"\"\n    # train_size: 384, eval_size: 480\n\n    # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio\n    model_config = [[4, 3, 1, 1, 32, 32, 0, 0],\n                    [7, 3, 2, 4, 32, 64, 0, 0],\n                    [7, 3, 2, 4, 64, 96, 0, 0],\n                    [10, 3, 2, 4, 96, 192, 1, 0.25],\n                    [19, 3, 1, 6, 192, 224, 1, 0.25],\n                    [25, 3, 2, 6, 224, 384, 1, 0.25],\n                    [7, 3, 1, 6, 384, 640, 1, 0.25]]\n\n    model = EfficientNetV2(model_cnf=model_config,\n                           num_classes=num_classes,\n                           dropout_rate=0.4)\n    return model\n"
  },
  {
    "path": "pytorch_classification/model_complexity/utils.py",
    "content": "\"\"\"\nthese code refers to:\nhttps://github.com/facebookresearch/pycls/blob/master/pycls/models/blocks.py\n\"\"\"\n\n\ndef conv2d_cx(cx, in_c, out_c, k, *, stride=1, groups=1, bias=False, trainable=True):\n    \"\"\"Accumulates complexity of conv2d into cx = (h, w, flops, params, acts).\"\"\"\n    assert k % 2 == 1, \"Only odd size kernels supported to avoid padding issues.\"\n    h, w, c = cx[\"h\"], cx[\"w\"], cx[\"c\"]\n    assert c == in_c\n    h, w = (h - 1) // stride + 1, (w - 1) // stride + 1\n    cx[\"h\"] = h\n    cx[\"w\"] = w\n    cx[\"c\"] = out_c\n    cx[\"flops\"] += k * k * in_c * out_c * h * w // groups + (out_c if bias else 0)\n    cx[\"params\"] += k * k * in_c * out_c // groups + (out_c if bias else 0)\n    cx[\"acts\"] += out_c * h * w\n    if trainable is False:\n        cx[\"freeze\"] += k * k * in_c * out_c // groups + (out_c if bias else 0)\n    return cx\n\n\ndef pool2d_cx(cx, in_c, k, *, stride=1):\n    \"\"\"Accumulates complexity of pool2d into cx = (h, w, flops, params, acts).\"\"\"\n    assert k % 2 == 1, \"Only odd size kernels supported to avoid padding issues.\"\n    h, w, c = cx[\"h\"], cx[\"w\"], cx[\"c\"]\n    assert c == in_c\n    h, w = (h - 1) // stride + 1, (w - 1) // stride + 1\n    cx[\"h\"] = h\n    cx[\"w\"] = w\n    cx[\"acts\"] += in_c * h * w\n    return cx\n\n\ndef norm2d_cx(cx, in_c, trainable=True):\n    \"\"\"Accumulates complexity of norm2d into cx = (h, w, flops, params, acts).\"\"\"\n    c, params = cx[\"c\"], cx[\"params\"]\n    assert c == in_c\n    cx[\"params\"] += 4 * c\n    cx[\"freeze\"] += 2 * c  # moving_mean, variance\n    if trainable is False:\n        cx[\"freeze\"] += 2 * c  # beta, gamma\n    return cx\n\n\ndef gap2d_cx(cx):\n    \"\"\"Accumulates complexity of gap2d into cx = (h, w, flops, params, acts).\"\"\"\n    cx[\"h\"] = 1\n    cx[\"w\"] = 1\n    return cx\n\n\ndef linear_cx(cx, in_units, out_units, *, bias=False, trainable=True):\n    \"\"\"Accumulates complexity of linear into cx = (h, w, flops, params, acts).\"\"\"\n    c = cx[\"c\"]\n    assert c == in_units\n    cx[\"c\"] = out_units\n    cx[\"flops\"] += in_units * out_units + (out_units if bias else 0)\n    cx[\"params\"] += in_units * out_units + (out_units if bias else 0)\n    cx[\"acts\"] += out_units\n    if trainable is False:\n        cx[\"freeze\"] += in_units * out_units + (out_units if bias else 0)\n    return cx\n"
  },
  {
    "path": "pytorch_classification/swin_transformer/README.md",
    "content": "## 代码使用简介\n\n1. 下载好数据集，代码中默认使用的是花分类数据集，下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz),\n如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0\n2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径\n3. 下载预训练权重，在`model.py`文件中每个模型都有提供预训练权重的下载地址，根据自己使用的模型下载对应预训练权重\n4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径\n5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件)\n6. 在`predict.py`脚本中导入和训练脚本中同样的模型，并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下)\n7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径\n8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了\n9. 如果要使用自己的数据集，请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹)，并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数\n"
  },
  {
    "path": "pytorch_classification/swin_transformer/create_confusion_matrix.py",
    "content": "import os\nimport json\nimport argparse\nimport sys\n\nimport torch\nfrom torchvision import transforms\nimport numpy as np\nfrom tqdm import tqdm\nimport matplotlib.pyplot as plt\nfrom prettytable import PrettyTable\n\nfrom utils import read_split_data\nfrom my_dataset import MyDataSet\nfrom model import swin_base_patch4_window12_384_in22k as create_model\n\n\nclass ConfusionMatrix(object):\n    \"\"\"\n    注意，如果显示的图像不全，是matplotlib版本问题\n    本例程使用matplotlib-3.2.1(windows and ubuntu)绘制正常\n    需要额外安装prettytable库\n    \"\"\"\n    def __init__(self, num_classes: int, labels: list):\n        self.matrix = np.zeros((num_classes, num_classes))\n        self.num_classes = num_classes\n        self.labels = labels\n\n    def update(self, preds, labels):\n        for p, t in zip(preds, labels):\n            self.matrix[p, t] += 1\n\n    def summary(self):\n        # calculate accuracy\n        sum_TP = 0\n        for i in range(self.num_classes):\n            sum_TP += self.matrix[i, i]\n        acc = sum_TP / np.sum(self.matrix)\n        print(\"the model accuracy is \", acc)\n\n        # precision, recall, specificity\n        table = PrettyTable()\n        table.field_names = [\"\", \"Precision\", \"Recall\", \"Specificity\"]\n        for i in range(self.num_classes):\n            TP = self.matrix[i, i]\n            FP = np.sum(self.matrix[i, :]) - TP\n            FN = np.sum(self.matrix[:, i]) - TP\n            TN = np.sum(self.matrix) - TP - FP - FN\n            Precision = round(TP / (TP + FP), 3) if TP + FP != 0 else 0.\n            Recall = round(TP / (TP + FN), 3) if TP + FN != 0 else 0.\n            Specificity = round(TN / (TN + FP), 3) if TN + FP != 0 else 0.\n            table.add_row([self.labels[i], Precision, Recall, Specificity])\n        print(table)\n\n    def plot(self):\n        matrix = self.matrix\n        print(matrix)\n        plt.imshow(matrix, cmap=plt.cm.Blues)\n\n        # 设置x轴坐标label\n        plt.xticks(range(self.num_classes), self.labels, rotation=45)\n        # 设置y轴坐标label\n        plt.yticks(range(self.num_classes), self.labels)\n        # 显示colorbar\n        plt.colorbar()\n        plt.xlabel('True Labels')\n        plt.ylabel('Predicted Labels')\n        plt.title('Confusion matrix')\n\n        # 在图中标注数量/概率信息\n        thresh = matrix.max() / 2\n        for x in range(self.num_classes):\n            for y in range(self.num_classes):\n                # 注意这里的matrix[y, x]不是matrix[x, y]\n                info = int(matrix[y, x])\n                plt.text(x, y, info,\n                         verticalalignment='center',\n                         horizontalalignment='center',\n                         color=\"white\" if info > thresh else \"black\")\n        plt.tight_layout()\n        plt.show()\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n    print(f\"using device: {device}\")\n\n    _, _, val_images_path, val_images_label = read_split_data(args.data_path)\n\n    img_size = 384\n    data_transform = {\n        \"val\": transforms.Compose([transforms.Resize(int(img_size * 1.143)),\n                                   transforms.CenterCrop(img_size),\n                                   transforms.ToTensor(),\n                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}\n\n    # 实例化验证数据集\n    val_dataset = MyDataSet(images_path=val_images_path,\n                            images_class=val_images_label,\n                            transform=data_transform[\"val\"])\n\n    nw = min([os.cpu_count(), args.batch_size if args.batch_size > 1 else 0, 8])  # number of workers\n    print('Using {} dataloader workers every process'.format(nw))\n\n    val_loader = torch.utils.data.DataLoader(val_dataset,\n                                             batch_size=args.batch_size,\n                                             shuffle=False,\n                                             pin_memory=True,\n                                             num_workers=nw,\n                                             collate_fn=val_dataset.collate_fn)\n\n    model = create_model(num_classes=args.num_classes)\n    # load pretrain weights\n    assert os.path.exists(args.weights), \"cannot find {} file\".format(args.weights)\n    model.load_state_dict(torch.load(args.weights, map_location=device))\n    model.to(device)\n\n    # read class_indict\n    json_label_path = './class_indices.json'\n    assert os.path.exists(json_label_path), \"cannot find {} file\".format(json_label_path)\n    json_file = open(json_label_path, 'r')\n    class_indict = json.load(json_file)\n\n    labels = [label for _, label in class_indict.items()]\n    confusion = ConfusionMatrix(num_classes=args.num_classes, labels=labels)\n    model.eval()\n    with torch.no_grad():\n        for val_data in tqdm(val_loader, file=sys.stdout):\n            val_images, val_labels = val_data\n            outputs = model(val_images.to(device))\n            outputs = torch.softmax(outputs, dim=1)\n            outputs = torch.argmax(outputs, dim=1)\n            confusion.update(outputs.to(\"cpu\").numpy(), val_labels.to(\"cpu\").numpy())\n    confusion.plot()\n    confusion.summary()\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--num_classes', type=int, default=5)\n    parser.add_argument('--batch-size', type=int, default=2)\n\n    # 数据集所在根目录\n    # http://download.tensorflow.org/example_images/flower_photos.tgz\n    parser.add_argument('--data-path', type=str,\n                        default=\"/data/flower_photos\")\n\n    # 训练权重路径\n    parser.add_argument('--weights', type=str, default='./weights/model-19.pth',\n                        help='initial weights path')\n    # 是否冻结权重\n    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')\n\n    opt = parser.parse_args()\n\n    main(opt)\n"
  },
  {
    "path": "pytorch_classification/swin_transformer/model.py",
    "content": "\"\"\" Swin Transformer\nA PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows`\n    - https://arxiv.org/pdf/2103.14030\n\nCode/weights from https://github.com/microsoft/Swin-Transformer\n\n\"\"\"\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.utils.checkpoint as checkpoint\nimport numpy as np\nfrom typing import Optional\n\n\ndef drop_path_f(x, drop_prob: float = 0., training: bool = False):\n    \"\"\"Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).\n\n    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,\n    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...\n    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for\n    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use\n    'survival rate' as the argument.\n\n    \"\"\"\n    if drop_prob == 0. or not training:\n        return x\n    keep_prob = 1 - drop_prob\n    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets\n    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)\n    random_tensor.floor_()  # binarize\n    output = x.div(keep_prob) * random_tensor\n    return output\n\n\nclass DropPath(nn.Module):\n    \"\"\"Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).\n    \"\"\"\n    def __init__(self, drop_prob=None):\n        super(DropPath, self).__init__()\n        self.drop_prob = drop_prob\n\n    def forward(self, x):\n        return drop_path_f(x, self.drop_prob, self.training)\n\n\ndef window_partition(x, window_size: int):\n    \"\"\"\n    将feature map按照window_size划分成一个个没有重叠的window\n    Args:\n        x: (B, H, W, C)\n        window_size (int): window size(M)\n\n    Returns:\n        windows: (num_windows*B, window_size, window_size, C)\n    \"\"\"\n    B, H, W, C = x.shape\n    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)\n    # permute: [B, H//Mh, Mh, W//Mw, Mw, C] -> [B, H//Mh, W//Mh, Mw, Mw, C]\n    # view: [B, H//Mh, W//Mw, Mh, Mw, C] -> [B*num_windows, Mh, Mw, C]\n    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)\n    return windows\n\n\ndef window_reverse(windows, window_size: int, H: int, W: int):\n    \"\"\"\n    将一个个window还原成一个feature map\n    Args:\n        windows: (num_windows*B, window_size, window_size, C)\n        window_size (int): Window size(M)\n        H (int): Height of image\n        W (int): Width of image\n\n    Returns:\n        x: (B, H, W, C)\n    \"\"\"\n    B = int(windows.shape[0] / (H * W / window_size / window_size))\n    # view: [B*num_windows, Mh, Mw, C] -> [B, H//Mh, W//Mw, Mh, Mw, C]\n    x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)\n    # permute: [B, H//Mh, W//Mw, Mh, Mw, C] -> [B, H//Mh, Mh, W//Mw, Mw, C]\n    # view: [B, H//Mh, Mh, W//Mw, Mw, C] -> [B, H, W, C]\n    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)\n    return x\n\n\nclass PatchEmbed(nn.Module):\n    \"\"\"\n    2D Image to Patch Embedding\n    \"\"\"\n    def __init__(self, patch_size=4, in_c=3, embed_dim=96, norm_layer=None):\n        super().__init__()\n        patch_size = (patch_size, patch_size)\n        self.patch_size = patch_size\n        self.in_chans = in_c\n        self.embed_dim = embed_dim\n        self.proj = nn.Conv2d(in_c, embed_dim, kernel_size=patch_size, stride=patch_size)\n        self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()\n\n    def forward(self, x):\n        _, _, H, W = x.shape\n\n        # padding\n        # 如果输入图片的H，W不是patch_size的整数倍，需要进行padding\n        pad_input = (H % self.patch_size[0] != 0) or (W % self.patch_size[1] != 0)\n        if pad_input:\n            # to pad the last 3 dimensions,\n            # (W_left, W_right, H_top,H_bottom, C_front, C_back)\n            x = F.pad(x, (0, self.patch_size[1] - W % self.patch_size[1],\n                          0, self.patch_size[0] - H % self.patch_size[0],\n                          0, 0))\n\n        # 下采样patch_size倍\n        x = self.proj(x)\n        _, _, H, W = x.shape\n        # flatten: [B, C, H, W] -> [B, C, HW]\n        # transpose: [B, C, HW] -> [B, HW, C]\n        x = x.flatten(2).transpose(1, 2)\n        x = self.norm(x)\n        return x, H, W\n\n\nclass PatchMerging(nn.Module):\n    r\"\"\" Patch Merging Layer.\n\n    Args:\n        dim (int): Number of input channels.\n        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm\n    \"\"\"\n\n    def __init__(self, dim, norm_layer=nn.LayerNorm):\n        super().__init__()\n        self.dim = dim\n        self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)\n        self.norm = norm_layer(4 * dim)\n\n    def forward(self, x, H, W):\n        \"\"\"\n        x: B, H*W, C\n        \"\"\"\n        B, L, C = x.shape\n        assert L == H * W, \"input feature has wrong size\"\n\n        x = x.view(B, H, W, C)\n\n        # padding\n        # 如果输入feature map的H，W不是2的整数倍，需要进行padding\n        pad_input = (H % 2 == 1) or (W % 2 == 1)\n        if pad_input:\n            # to pad the last 3 dimensions, starting from the last dimension and moving forward.\n            # (C_front, C_back, W_left, W_right, H_top, H_bottom)\n            # 注意这里的Tensor通道是[B, H, W, C]，所以会和官方文档有些不同\n            x = F.pad(x, (0, 0, 0, W % 2, 0, H % 2))\n\n        x0 = x[:, 0::2, 0::2, :]  # [B, H/2, W/2, C]\n        x1 = x[:, 1::2, 0::2, :]  # [B, H/2, W/2, C]\n        x2 = x[:, 0::2, 1::2, :]  # [B, H/2, W/2, C]\n        x3 = x[:, 1::2, 1::2, :]  # [B, H/2, W/2, C]\n        x = torch.cat([x0, x1, x2, x3], -1)  # [B, H/2, W/2, 4*C]\n        x = x.view(B, -1, 4 * C)  # [B, H/2*W/2, 4*C]\n\n        x = self.norm(x)\n        x = self.reduction(x)  # [B, H/2*W/2, 2*C]\n\n        return x\n\n\nclass Mlp(nn.Module):\n    \"\"\" MLP as used in Vision Transformer, MLP-Mixer and related networks\n    \"\"\"\n    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):\n        super().__init__()\n        out_features = out_features or in_features\n        hidden_features = hidden_features or in_features\n\n        self.fc1 = nn.Linear(in_features, hidden_features)\n        self.act = act_layer()\n        self.drop1 = nn.Dropout(drop)\n        self.fc2 = nn.Linear(hidden_features, out_features)\n        self.drop2 = nn.Dropout(drop)\n\n    def forward(self, x):\n        x = self.fc1(x)\n        x = self.act(x)\n        x = self.drop1(x)\n        x = self.fc2(x)\n        x = self.drop2(x)\n        return x\n\n\nclass WindowAttention(nn.Module):\n    r\"\"\" Window based multi-head self attention (W-MSA) module with relative position bias.\n    It supports both of shifted and non-shifted window.\n\n    Args:\n        dim (int): Number of input channels.\n        window_size (tuple[int]): The height and width of the window.\n        num_heads (int): Number of attention heads.\n        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True\n        attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0\n        proj_drop (float, optional): Dropout ratio of output. Default: 0.0\n    \"\"\"\n\n    def __init__(self, dim, window_size, num_heads, qkv_bias=True, attn_drop=0., proj_drop=0.):\n\n        super().__init__()\n        self.dim = dim\n        self.window_size = window_size  # [Mh, Mw]\n        self.num_heads = num_heads\n        head_dim = dim // num_heads\n        self.scale = head_dim ** -0.5\n\n        # define a parameter table of relative position bias\n        self.relative_position_bias_table = nn.Parameter(\n            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads))  # [2*Mh-1 * 2*Mw-1, nH]\n\n        # get pair-wise relative position index for each token inside the window\n        coords_h = torch.arange(self.window_size[0])\n        coords_w = torch.arange(self.window_size[1])\n        coords = torch.stack(torch.meshgrid([coords_h, coords_w], indexing=\"ij\"))  # [2, Mh, Mw]\n        coords_flatten = torch.flatten(coords, 1)  # [2, Mh*Mw]\n        # [2, Mh*Mw, 1] - [2, 1, Mh*Mw]\n        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # [2, Mh*Mw, Mh*Mw]\n        relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # [Mh*Mw, Mh*Mw, 2]\n        relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0\n        relative_coords[:, :, 1] += self.window_size[1] - 1\n        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1\n        relative_position_index = relative_coords.sum(-1)  # [Mh*Mw, Mh*Mw]\n        self.register_buffer(\"relative_position_index\", relative_position_index)\n\n        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)\n        self.attn_drop = nn.Dropout(attn_drop)\n        self.proj = nn.Linear(dim, dim)\n        self.proj_drop = nn.Dropout(proj_drop)\n\n        nn.init.trunc_normal_(self.relative_position_bias_table, std=.02)\n        self.softmax = nn.Softmax(dim=-1)\n\n    def forward(self, x, mask: Optional[torch.Tensor] = None):\n        \"\"\"\n        Args:\n            x: input features with shape of (num_windows*B, Mh*Mw, C)\n            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None\n        \"\"\"\n        # [batch_size*num_windows, Mh*Mw, total_embed_dim]\n        B_, N, C = x.shape\n        # qkv(): -> [batch_size*num_windows, Mh*Mw, 3 * total_embed_dim]\n        # reshape: -> [batch_size*num_windows, Mh*Mw, 3, num_heads, embed_dim_per_head]\n        # permute: -> [3, batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head]\n        qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)\n        # [batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head]\n        q, k, v = qkv.unbind(0)  # make torchscript happy (cannot use tensor as tuple)\n\n        # transpose: -> [batch_size*num_windows, num_heads, embed_dim_per_head, Mh*Mw]\n        # @: multiply -> [batch_size*num_windows, num_heads, Mh*Mw, Mh*Mw]\n        q = q * self.scale\n        attn = (q @ k.transpose(-2, -1))\n\n        # relative_position_bias_table.view: [Mh*Mw*Mh*Mw,nH] -> [Mh*Mw,Mh*Mw,nH]\n        relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view(\n            self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)\n        relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # [nH, Mh*Mw, Mh*Mw]\n        attn = attn + relative_position_bias.unsqueeze(0)\n\n        if mask is not None:\n            # mask: [nW, Mh*Mw, Mh*Mw]\n            nW = mask.shape[0]  # num_windows\n            # attn.view: [batch_size, num_windows, num_heads, Mh*Mw, Mh*Mw]\n            # mask.unsqueeze: [1, nW, 1, Mh*Mw, Mh*Mw]\n            attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0)\n            attn = attn.view(-1, self.num_heads, N, N)\n            attn = self.softmax(attn)\n        else:\n            attn = self.softmax(attn)\n\n        attn = self.attn_drop(attn)\n\n        # @: multiply -> [batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head]\n        # transpose: -> [batch_size*num_windows, Mh*Mw, num_heads, embed_dim_per_head]\n        # reshape: -> [batch_size*num_windows, Mh*Mw, total_embed_dim]\n        x = (attn @ v).transpose(1, 2).reshape(B_, N, C)\n        x = self.proj(x)\n        x = self.proj_drop(x)\n        return x\n\n\nclass SwinTransformerBlock(nn.Module):\n    r\"\"\" Swin Transformer Block.\n\n    Args:\n        dim (int): Number of input channels.\n        num_heads (int): Number of attention heads.\n        window_size (int): Window size.\n        shift_size (int): Shift size for SW-MSA.\n        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.\n        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True\n        drop (float, optional): Dropout rate. Default: 0.0\n        attn_drop (float, optional): Attention dropout rate. Default: 0.0\n        drop_path (float, optional): Stochastic depth rate. Default: 0.0\n        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU\n        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm\n    \"\"\"\n\n    def __init__(self, dim, num_heads, window_size=7, shift_size=0,\n                 mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0., drop_path=0.,\n                 act_layer=nn.GELU, norm_layer=nn.LayerNorm):\n        super().__init__()\n        self.dim = dim\n        self.num_heads = num_heads\n        self.window_size = window_size\n        self.shift_size = shift_size\n        self.mlp_ratio = mlp_ratio\n        assert 0 <= self.shift_size < self.window_size, \"shift_size must in 0-window_size\"\n\n        self.norm1 = norm_layer(dim)\n        self.attn = WindowAttention(\n            dim, window_size=(self.window_size, self.window_size), num_heads=num_heads, qkv_bias=qkv_bias,\n            attn_drop=attn_drop, proj_drop=drop)\n\n        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()\n        self.norm2 = norm_layer(dim)\n        mlp_hidden_dim = int(dim * mlp_ratio)\n        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)\n\n    def forward(self, x, attn_mask):\n        H, W = self.H, self.W\n        B, L, C = x.shape\n        assert L == H * W, \"input feature has wrong size\"\n\n        shortcut = x\n        x = self.norm1(x)\n        x = x.view(B, H, W, C)\n\n        # pad feature maps to multiples of window size\n        # 把feature map给pad到window size的整数倍\n        pad_l = pad_t = 0\n        pad_r = (self.window_size - W % self.window_size) % self.window_size\n        pad_b = (self.window_size - H % self.window_size) % self.window_size\n        x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b))\n        _, Hp, Wp, _ = x.shape\n\n        # cyclic shift\n        if self.shift_size > 0:\n            shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))\n        else:\n            shifted_x = x\n            attn_mask = None\n\n        # partition windows\n        x_windows = window_partition(shifted_x, self.window_size)  # [nW*B, Mh, Mw, C]\n        x_windows = x_windows.view(-1, self.window_size * self.window_size, C)  # [nW*B, Mh*Mw, C]\n\n        # W-MSA/SW-MSA\n        attn_windows = self.attn(x_windows, mask=attn_mask)  # [nW*B, Mh*Mw, C]\n\n        # merge windows\n        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)  # [nW*B, Mh, Mw, C]\n        shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp)  # [B, H', W', C]\n\n        # reverse cyclic shift\n        if self.shift_size > 0:\n            x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))\n        else:\n            x = shifted_x\n\n        if pad_r > 0 or pad_b > 0:\n            # 把前面pad的数据移除掉\n            x = x[:, :H, :W, :].contiguous()\n\n        x = x.view(B, H * W, C)\n\n        # FFN\n        x = shortcut + self.drop_path(x)\n        x = x + self.drop_path(self.mlp(self.norm2(x)))\n\n        return x\n\n\nclass BasicLayer(nn.Module):\n    \"\"\"\n    A basic Swin Transformer layer for one stage.\n\n    Args:\n        dim (int): Number of input channels.\n        depth (int): Number of blocks.\n        num_heads (int): Number of attention heads.\n        window_size (int): Local window size.\n        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.\n        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True\n        drop (float, optional): Dropout rate. Default: 0.0\n        attn_drop (float, optional): Attention dropout rate. Default: 0.0\n        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0\n        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm\n        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None\n        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.\n    \"\"\"\n\n    def __init__(self, dim, depth, num_heads, window_size,\n                 mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0.,\n                 drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False):\n        super().__init__()\n        self.dim = dim\n        self.depth = depth\n        self.window_size = window_size\n        self.use_checkpoint = use_checkpoint\n        self.shift_size = window_size // 2\n\n        # build blocks\n        self.blocks = nn.ModuleList([\n            SwinTransformerBlock(\n                dim=dim,\n                num_heads=num_heads,\n                window_size=window_size,\n                shift_size=0 if (i % 2 == 0) else self.shift_size,\n                mlp_ratio=mlp_ratio,\n                qkv_bias=qkv_bias,\n                drop=drop,\n                attn_drop=attn_drop,\n                drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,\n                norm_layer=norm_layer)\n            for i in range(depth)])\n\n        # patch merging layer\n        if downsample is not None:\n            self.downsample = downsample(dim=dim, norm_layer=norm_layer)\n        else:\n            self.downsample = None\n\n    def create_mask(self, x, H, W):\n        # calculate attention mask for SW-MSA\n        # 保证Hp和Wp是window_size的整数倍\n        Hp = int(np.ceil(H / self.window_size)) * self.window_size\n        Wp = int(np.ceil(W / self.window_size)) * self.window_size\n        # 拥有和feature map一样的通道排列顺序，方便后续window_partition\n        img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device)  # [1, Hp, Wp, 1]\n        h_slices = (slice(0, -self.window_size),\n                    slice(-self.window_size, -self.shift_size),\n                    slice(-self.shift_size, None))\n        w_slices = (slice(0, -self.window_size),\n                    slice(-self.window_size, -self.shift_size),\n                    slice(-self.shift_size, None))\n        cnt = 0\n        for h in h_slices:\n            for w in w_slices:\n                img_mask[:, h, w, :] = cnt\n                cnt += 1\n\n        mask_windows = window_partition(img_mask, self.window_size)  # [nW, Mh, Mw, 1]\n        mask_windows = mask_windows.view(-1, self.window_size * self.window_size)  # [nW, Mh*Mw]\n        attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)  # [nW, 1, Mh*Mw] - [nW, Mh*Mw, 1]\n        # [nW, Mh*Mw, Mh*Mw]\n        attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))\n        return attn_mask\n\n    def forward(self, x, H, W):\n        attn_mask = self.create_mask(x, H, W)  # [nW, Mh*Mw, Mh*Mw]\n        for blk in self.blocks:\n            blk.H, blk.W = H, W\n            if not torch.jit.is_scripting() and self.use_checkpoint:\n                x = checkpoint.checkpoint(blk, x, attn_mask)\n            else:\n                x = blk(x, attn_mask)\n        if self.downsample is not None:\n            x = self.downsample(x, H, W)\n            H, W = (H + 1) // 2, (W + 1) // 2\n\n        return x, H, W\n\n\nclass SwinTransformer(nn.Module):\n    r\"\"\" Swin Transformer\n        A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows`  -\n          https://arxiv.org/pdf/2103.14030\n\n    Args:\n        patch_size (int | tuple(int)): Patch size. Default: 4\n        in_chans (int): Number of input image channels. Default: 3\n        num_classes (int): Number of classes for classification head. Default: 1000\n        embed_dim (int): Patch embedding dimension. Default: 96\n        depths (tuple(int)): Depth of each Swin Transformer layer.\n        num_heads (tuple(int)): Number of attention heads in different layers.\n        window_size (int): Window size. Default: 7\n        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4\n        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True\n        drop_rate (float): Dropout rate. Default: 0\n        attn_drop_rate (float): Attention dropout rate. Default: 0\n        drop_path_rate (float): Stochastic depth rate. Default: 0.1\n        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.\n        patch_norm (bool): If True, add normalization after patch embedding. Default: True\n        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False\n    \"\"\"\n\n    def __init__(self, patch_size=4, in_chans=3, num_classes=1000,\n                 embed_dim=96, depths=(2, 2, 6, 2), num_heads=(3, 6, 12, 24),\n                 window_size=7, mlp_ratio=4., qkv_bias=True,\n                 drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1,\n                 norm_layer=nn.LayerNorm, patch_norm=True,\n                 use_checkpoint=False, **kwargs):\n        super().__init__()\n\n        self.num_classes = num_classes\n        self.num_layers = len(depths)\n        self.embed_dim = embed_dim\n        self.patch_norm = patch_norm\n        # stage4输出特征矩阵的channels\n        self.num_features = int(embed_dim * 2 ** (self.num_layers - 1))\n        self.mlp_ratio = mlp_ratio\n\n        # split image into non-overlapping patches\n        self.patch_embed = PatchEmbed(\n            patch_size=patch_size, in_c=in_chans, embed_dim=embed_dim,\n            norm_layer=norm_layer if self.patch_norm else None)\n        self.pos_drop = nn.Dropout(p=drop_rate)\n\n        # stochastic depth\n        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule\n\n        # build layers\n        self.layers = nn.ModuleList()\n        for i_layer in range(self.num_layers):\n            # 注意这里构建的stage和论文图中有些差异\n            # 这里的stage不包含该stage的patch_merging层，包含的是下个stage的\n            layers = BasicLayer(dim=int(embed_dim * 2 ** i_layer),\n                                depth=depths[i_layer],\n                                num_heads=num_heads[i_layer],\n                                window_size=window_size,\n                                mlp_ratio=self.mlp_ratio,\n                                qkv_bias=qkv_bias,\n                                drop=drop_rate,\n                                attn_drop=attn_drop_rate,\n                                drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],\n                                norm_layer=norm_layer,\n                                downsample=PatchMerging if (i_layer < self.num_layers - 1) else None,\n                                use_checkpoint=use_checkpoint)\n            self.layers.append(layers)\n\n        self.norm = norm_layer(self.num_features)\n        self.avgpool = nn.AdaptiveAvgPool1d(1)\n        self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity()\n\n        self.apply(self._init_weights)\n\n    def _init_weights(self, m):\n        if isinstance(m, nn.Linear):\n            nn.init.trunc_normal_(m.weight, std=.02)\n            if isinstance(m, nn.Linear) and m.bias is not None:\n                nn.init.constant_(m.bias, 0)\n        elif isinstance(m, nn.LayerNorm):\n            nn.init.constant_(m.bias, 0)\n            nn.init.constant_(m.weight, 1.0)\n\n    def forward(self, x):\n        # x: [B, L, C]\n        x, H, W = self.patch_embed(x)\n        x = self.pos_drop(x)\n\n        for layer in self.layers:\n            x, H, W = layer(x, H, W)\n\n        x = self.norm(x)  # [B, L, C]\n        x = self.avgpool(x.transpose(1, 2))  # [B, C, 1]\n        x = torch.flatten(x, 1)\n        x = self.head(x)\n        return x\n\n\ndef swin_tiny_patch4_window7_224(num_classes: int = 1000, **kwargs):\n    # trained ImageNet-1K\n    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth\n    model = SwinTransformer(in_chans=3,\n                            patch_size=4,\n                            window_size=7,\n                            embed_dim=96,\n                            depths=(2, 2, 6, 2),\n                            num_heads=(3, 6, 12, 24),\n                            num_classes=num_classes,\n                            **kwargs)\n    return model\n\n\ndef swin_small_patch4_window7_224(num_classes: int = 1000, **kwargs):\n    # trained ImageNet-1K\n    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth\n    model = SwinTransformer(in_chans=3,\n                            patch_size=4,\n                            window_size=7,\n                            embed_dim=96,\n                            depths=(2, 2, 18, 2),\n                            num_heads=(3, 6, 12, 24),\n                            num_classes=num_classes,\n                            **kwargs)\n    return model\n\n\ndef swin_base_patch4_window7_224(num_classes: int = 1000, **kwargs):\n    # trained ImageNet-1K\n    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224.pth\n    model = SwinTransformer(in_chans=3,\n                            patch_size=4,\n                            window_size=7,\n                            embed_dim=128,\n                            depths=(2, 2, 18, 2),\n                            num_heads=(4, 8, 16, 32),\n                            num_classes=num_classes,\n                            **kwargs)\n    return model\n\n\ndef swin_base_patch4_window12_384(num_classes: int = 1000, **kwargs):\n    # trained ImageNet-1K\n    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384.pth\n    model = SwinTransformer(in_chans=3,\n                            patch_size=4,\n                            window_size=12,\n                            embed_dim=128,\n                            depths=(2, 2, 18, 2),\n                            num_heads=(4, 8, 16, 32),\n                            num_classes=num_classes,\n                            **kwargs)\n    return model\n\n\ndef swin_base_patch4_window7_224_in22k(num_classes: int = 21841, **kwargs):\n    # trained ImageNet-22K\n    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22k.pth\n    model = SwinTransformer(in_chans=3,\n                            patch_size=4,\n                            window_size=7,\n                            embed_dim=128,\n                            depths=(2, 2, 18, 2),\n                            num_heads=(4, 8, 16, 32),\n                            num_classes=num_classes,\n                            **kwargs)\n    return model\n\n\ndef swin_base_patch4_window12_384_in22k(num_classes: int = 21841, **kwargs):\n    # trained ImageNet-22K\n    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth\n    model = SwinTransformer(in_chans=3,\n                            patch_size=4,\n                            window_size=12,\n                            embed_dim=128,\n                            depths=(2, 2, 18, 2),\n                            num_heads=(4, 8, 16, 32),\n                            num_classes=num_classes,\n                            **kwargs)\n    return model\n\n\ndef swin_large_patch4_window7_224_in22k(num_classes: int = 21841, **kwargs):\n    # trained ImageNet-22K\n    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window7_224_22k.pth\n    model = SwinTransformer(in_chans=3,\n                            patch_size=4,\n                            window_size=7,\n                            embed_dim=192,\n                            depths=(2, 2, 18, 2),\n                            num_heads=(6, 12, 24, 48),\n                            num_classes=num_classes,\n                            **kwargs)\n    return model\n\n\ndef swin_large_patch4_window12_384_in22k(num_classes: int = 21841, **kwargs):\n    # trained ImageNet-22K\n    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth\n    model = SwinTransformer(in_chans=3,\n                            patch_size=4,\n                            window_size=12,\n                            embed_dim=192,\n                            depths=(2, 2, 18, 2),\n                            num_heads=(6, 12, 24, 48),\n                            num_classes=num_classes,\n                            **kwargs)\n    return model\n"
  },
  {
    "path": "pytorch_classification/swin_transformer/my_dataset.py",
    "content": "from PIL import Image\nimport torch\nfrom torch.utils.data import Dataset\n\n\nclass MyDataSet(Dataset):\n    \"\"\"自定义数据集\"\"\"\n\n    def __init__(self, images_path: list, images_class: list, transform=None):\n        self.images_path = images_path\n        self.images_class = images_class\n        self.transform = transform\n\n    def __len__(self):\n        return len(self.images_path)\n\n    def __getitem__(self, item):\n        img = Image.open(self.images_path[item])\n        # RGB为彩色图片，L为灰度图片\n        if img.mode != 'RGB':\n            raise ValueError(\"image: {} isn't RGB mode.\".format(self.images_path[item]))\n        label = self.images_class[item]\n\n        if self.transform is not None:\n            img = self.transform(img)\n\n        return img, label\n\n    @staticmethod\n    def collate_fn(batch):\n        # 官方实现的default_collate可以参考\n        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py\n        images, labels = tuple(zip(*batch))\n\n        images = torch.stack(images, dim=0)\n        labels = torch.as_tensor(labels)\n        return images, labels\n"
  },
  {
    "path": "pytorch_classification/swin_transformer/predict.py",
    "content": "import os\nimport json\n\nimport torch\nfrom PIL import Image\nfrom torchvision import transforms\nimport matplotlib.pyplot as plt\n\nfrom model import swin_tiny_patch4_window7_224 as create_model\n\n\ndef main():\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n\n    img_size = 224\n    data_transform = transforms.Compose(\n        [transforms.Resize(int(img_size * 1.14)),\n         transforms.CenterCrop(img_size),\n         transforms.ToTensor(),\n         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n    plt.imshow(img)\n    # [N, C, H, W]\n    img = data_transform(img)\n    # expand batch dimension\n    img = torch.unsqueeze(img, dim=0)\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    # create model\n    model = create_model(num_classes=5).to(device)\n    # load model weights\n    model_weight_path = \"./weights/model-9.pth\"\n    model.load_state_dict(torch.load(model_weight_path, map_location=device))\n    model.eval()\n    with torch.no_grad():\n        # predict class\n        output = torch.squeeze(model(img.to(device))).cpu()\n        predict = torch.softmax(output, dim=0)\n        predict_cla = torch.argmax(predict).numpy()\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_cla)],\n                                                 predict[predict_cla].numpy())\n    plt.title(print_res)\n    for i in range(len(predict)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  predict[i].numpy()))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/swin_transformer/select_incorrect_samples.py",
    "content": "\"\"\"\n该脚本能够把验证集中预测错误的图片挑选出来，并记录在record.txt中\n\"\"\"\nimport os\nimport json\nimport argparse\nimport sys\n\nimport torch\nfrom torchvision import transforms\nfrom tqdm import tqdm\n\nfrom my_dataset import MyDataSet\nfrom model import swin_base_patch4_window12_384_in22k as create_model\nfrom utils import read_split_data\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n\n    _, _, val_images_path, val_images_label = read_split_data(args.data_path)\n\n    img_size = 384\n    data_transform = {\n        \"val\": transforms.Compose([transforms.Resize(int(img_size * 1.143)),\n                                   transforms.CenterCrop(img_size),\n                                   transforms.ToTensor(),\n                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}\n\n    # 实例化验证数据集\n    val_dataset = MyDataSet(images_path=val_images_path,\n                            images_class=val_images_label,\n                            transform=data_transform[\"val\"])\n\n    batch_size = args.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using {} dataloader workers every process'.format(nw))\n\n    val_loader = torch.utils.data.DataLoader(val_dataset,\n                                             batch_size=batch_size,\n                                             shuffle=False,\n                                             pin_memory=True,\n                                             num_workers=nw,\n                                             collate_fn=val_dataset.collate_fn)\n\n    model = create_model(num_classes=args.num_classes).to(device)\n\n    assert os.path.exists(args.weights), \"weights file: '{}' not exist.\".format(args.weights)\n    model.load_state_dict(torch.load(args.weights, map_location=device))\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    json_file = open(json_path, \"r\")\n    class_indict = json.load(json_file)\n\n    model.eval()\n    with torch.no_grad():\n        with open(\"record.txt\", \"w\") as f:\n            # validate\n            data_loader = tqdm(val_loader, file=sys.stdout)\n            for step, data in enumerate(data_loader):\n                images, labels = data\n                pred = model(images.to(device))\n                pred_classes = torch.max(pred, dim=1)[1]\n                contrast = torch.eq(pred_classes, labels.to(device)).tolist()\n                labels = labels.tolist()\n                pred_classes = pred_classes.tolist()\n                for i, flag in enumerate(contrast):\n                    if flag is False:\n                        file_name = val_images_path[batch_size * step + i]\n                        true_label = class_indict[str(labels[i])]\n                        false_label = class_indict[str(pred_classes[i])]\n                        f.write(f\"{file_name}  TrueLabel:{true_label}  PredictLabel:{false_label}\\n\")\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--num_classes', type=int, default=5)\n    parser.add_argument('--batch-size', type=int, default=2)\n\n    # 数据集所在根目录\n    # http://download.tensorflow.org/example_images/flower_photos.tgz\n    parser.add_argument('--data-path', type=str,\n                        default=\"/data/flower_photos\")\n\n    # 训练权重路径\n    parser.add_argument('--weights', type=str, default='./weights/model-19.pth',\n                        help='initial weights path')\n    # 是否冻结权重\n    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')\n\n    opt = parser.parse_args()\n\n    main(opt)\n"
  },
  {
    "path": "pytorch_classification/swin_transformer/train.py",
    "content": "import os\nimport argparse\n\nimport torch\nimport torch.optim as optim\nfrom torch.utils.tensorboard import SummaryWriter\nfrom torchvision import transforms\n\nfrom my_dataset import MyDataSet\nfrom model import swin_tiny_patch4_window7_224 as create_model\nfrom utils import read_split_data, train_one_epoch, evaluate\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n\n    if os.path.exists(\"./weights\") is False:\n        os.makedirs(\"./weights\")\n\n    tb_writer = SummaryWriter()\n\n    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)\n\n    img_size = 224\n    data_transform = {\n        \"train\": transforms.Compose([transforms.RandomResizedCrop(img_size),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),\n        \"val\": transforms.Compose([transforms.Resize(int(img_size * 1.143)),\n                                   transforms.CenterCrop(img_size),\n                                   transforms.ToTensor(),\n                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}\n\n    # 实例化训练数据集\n    train_dataset = MyDataSet(images_path=train_images_path,\n                              images_class=train_images_label,\n                              transform=data_transform[\"train\"])\n\n    # 实例化验证数据集\n    val_dataset = MyDataSet(images_path=val_images_path,\n                            images_class=val_images_label,\n                            transform=data_transform[\"val\"])\n\n    batch_size = args.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using {} dataloader workers every process'.format(nw))\n    train_loader = torch.utils.data.DataLoader(train_dataset,\n                                               batch_size=batch_size,\n                                               shuffle=True,\n                                               pin_memory=True,\n                                               num_workers=nw,\n                                               collate_fn=train_dataset.collate_fn)\n\n    val_loader = torch.utils.data.DataLoader(val_dataset,\n                                             batch_size=batch_size,\n                                             shuffle=False,\n                                             pin_memory=True,\n                                             num_workers=nw,\n                                             collate_fn=val_dataset.collate_fn)\n\n    model = create_model(num_classes=args.num_classes).to(device)\n\n    if args.weights != \"\":\n        assert os.path.exists(args.weights), \"weights file: '{}' not exist.\".format(args.weights)\n        weights_dict = torch.load(args.weights, map_location=device)[\"model\"]\n        # 删除有关分类类别的权重\n        for k in list(weights_dict.keys()):\n            if \"head\" in k:\n                del weights_dict[k]\n        print(model.load_state_dict(weights_dict, strict=False))\n\n    if args.freeze_layers:\n        for name, para in model.named_parameters():\n            # 除head外，其他权重全部冻结\n            if \"head\" not in name:\n                para.requires_grad_(False)\n            else:\n                print(\"training {}\".format(name))\n\n    pg = [p for p in model.parameters() if p.requires_grad]\n    optimizer = optim.AdamW(pg, lr=args.lr, weight_decay=5E-2)\n\n    for epoch in range(args.epochs):\n        # train\n        train_loss, train_acc = train_one_epoch(model=model,\n                                                optimizer=optimizer,\n                                                data_loader=train_loader,\n                                                device=device,\n                                                epoch=epoch)\n\n        # validate\n        val_loss, val_acc = evaluate(model=model,\n                                     data_loader=val_loader,\n                                     device=device,\n                                     epoch=epoch)\n\n        tags = [\"train_loss\", \"train_acc\", \"val_loss\", \"val_acc\", \"learning_rate\"]\n        tb_writer.add_scalar(tags[0], train_loss, epoch)\n        tb_writer.add_scalar(tags[1], train_acc, epoch)\n        tb_writer.add_scalar(tags[2], val_loss, epoch)\n        tb_writer.add_scalar(tags[3], val_acc, epoch)\n        tb_writer.add_scalar(tags[4], optimizer.param_groups[0][\"lr\"], epoch)\n\n        torch.save(model.state_dict(), \"./weights/model-{}.pth\".format(epoch))\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--num_classes', type=int, default=5)\n    parser.add_argument('--epochs', type=int, default=10)\n    parser.add_argument('--batch-size', type=int, default=8)\n    parser.add_argument('--lr', type=float, default=0.0001)\n\n    # 数据集所在根目录\n    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz\n    parser.add_argument('--data-path', type=str,\n                        default=\"/data/flower_photos\")\n\n    # 预训练权重路径，如果不想载入就设置为空字符\n    parser.add_argument('--weights', type=str, default='./swin_tiny_patch4_window7_224.pth',\n                        help='initial weights path')\n    # 是否冻结权重\n    parser.add_argument('--freeze-layers', type=bool, default=False)\n    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')\n\n    opt = parser.parse_args()\n\n    main(opt)\n"
  },
  {
    "path": "pytorch_classification/swin_transformer/utils.py",
    "content": "import os\nimport sys\nimport json\nimport pickle\nimport random\n\nimport torch\nfrom tqdm import tqdm\n\nimport matplotlib.pyplot as plt\n\n\ndef read_split_data(root: str, val_rate: float = 0.2):\n    random.seed(0)  # 保证随机结果可复现\n    assert os.path.exists(root), \"dataset root: {} does not exist.\".format(root)\n\n    # 遍历文件夹，一个文件夹对应一个类别\n    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]\n    # 排序，保证各平台顺序一致\n    flower_class.sort()\n    # 生成类别名称以及对应的数字索引\n    class_indices = dict((k, v) for v, k in enumerate(flower_class))\n    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    train_images_path = []  # 存储训练集的所有图片路径\n    train_images_label = []  # 存储训练集图片对应索引信息\n    val_images_path = []  # 存储验证集的所有图片路径\n    val_images_label = []  # 存储验证集图片对应索引信息\n    every_class_num = []  # 存储每个类别的样本总数\n    supported = [\".jpg\", \".JPG\", \".png\", \".PNG\"]  # 支持的文件后缀类型\n    # 遍历每个文件夹下的文件\n    for cla in flower_class:\n        cla_path = os.path.join(root, cla)\n        # 遍历获取supported支持的所有文件路径\n        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)\n                  if os.path.splitext(i)[-1] in supported]\n        # 排序，保证各平台顺序一致\n        images.sort()\n        # 获取该类别对应的索引\n        image_class = class_indices[cla]\n        # 记录该类别的样本数量\n        every_class_num.append(len(images))\n        # 按比例随机采样验证样本\n        val_path = random.sample(images, k=int(len(images) * val_rate))\n\n        for img_path in images:\n            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集\n                val_images_path.append(img_path)\n                val_images_label.append(image_class)\n            else:  # 否则存入训练集\n                train_images_path.append(img_path)\n                train_images_label.append(image_class)\n\n    print(\"{} images were found in the dataset.\".format(sum(every_class_num)))\n    print(\"{} images for training.\".format(len(train_images_path)))\n    print(\"{} images for validation.\".format(len(val_images_path)))\n    assert len(train_images_path) > 0, \"number of training images must greater than 0.\"\n    assert len(val_images_path) > 0, \"number of validation images must greater than 0.\"\n\n    plot_image = False\n    if plot_image:\n        # 绘制每种类别个数柱状图\n        plt.bar(range(len(flower_class)), every_class_num, align='center')\n        # 将横坐标0,1,2,3,4替换为相应的类别名称\n        plt.xticks(range(len(flower_class)), flower_class)\n        # 在柱状图上添加数值标签\n        for i, v in enumerate(every_class_num):\n            plt.text(x=i, y=v + 5, s=str(v), ha='center')\n        # 设置x坐标\n        plt.xlabel('image class')\n        # 设置y坐标\n        plt.ylabel('number of images')\n        # 设置柱状图的标题\n        plt.title('flower class distribution')\n        plt.show()\n\n    return train_images_path, train_images_label, val_images_path, val_images_label\n\n\ndef plot_data_loader_image(data_loader):\n    batch_size = data_loader.batch_size\n    plot_num = min(batch_size, 4)\n\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), json_path + \" does not exist.\"\n    json_file = open(json_path, 'r')\n    class_indices = json.load(json_file)\n\n    for data in data_loader:\n        images, labels = data\n        for i in range(plot_num):\n            # [C, H, W] -> [H, W, C]\n            img = images[i].numpy().transpose(1, 2, 0)\n            # 反Normalize操作\n            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255\n            label = labels[i].item()\n            plt.subplot(1, plot_num, i+1)\n            plt.xlabel(class_indices[str(label)])\n            plt.xticks([])  # 去掉x轴的刻度\n            plt.yticks([])  # 去掉y轴的刻度\n            plt.imshow(img.astype('uint8'))\n        plt.show()\n\n\ndef write_pickle(list_info: list, file_name: str):\n    with open(file_name, 'wb') as f:\n        pickle.dump(list_info, f)\n\n\ndef read_pickle(file_name: str) -> list:\n    with open(file_name, 'rb') as f:\n        info_list = pickle.load(f)\n        return info_list\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch):\n    model.train()\n    loss_function = torch.nn.CrossEntropyLoss()\n    accu_loss = torch.zeros(1).to(device)  # 累计损失\n    accu_num = torch.zeros(1).to(device)   # 累计预测正确的样本数\n    optimizer.zero_grad()\n\n    sample_num = 0\n    data_loader = tqdm(data_loader, file=sys.stdout)\n    for step, data in enumerate(data_loader):\n        images, labels = data\n        sample_num += images.shape[0]\n\n        pred = model(images.to(device))\n        pred_classes = torch.max(pred, dim=1)[1]\n        accu_num += torch.eq(pred_classes, labels.to(device)).sum()\n\n        loss = loss_function(pred, labels.to(device))\n        loss.backward()\n        accu_loss += loss.detach()\n\n        data_loader.desc = \"[train epoch {}] loss: {:.3f}, acc: {:.3f}\".format(epoch,\n                                                                               accu_loss.item() / (step + 1),\n                                                                               accu_num.item() / sample_num)\n\n        if not torch.isfinite(loss):\n            print('WARNING: non-finite loss, ending training ', loss)\n            sys.exit(1)\n\n        optimizer.step()\n        optimizer.zero_grad()\n\n    return accu_loss.item() / (step + 1), accu_num.item() / sample_num\n\n\n@torch.no_grad()\ndef evaluate(model, data_loader, device, epoch):\n    loss_function = torch.nn.CrossEntropyLoss()\n\n    model.eval()\n\n    accu_num = torch.zeros(1).to(device)   # 累计预测正确的样本数\n    accu_loss = torch.zeros(1).to(device)  # 累计损失\n\n    sample_num = 0\n    data_loader = tqdm(data_loader, file=sys.stdout)\n    for step, data in enumerate(data_loader):\n        images, labels = data\n        sample_num += images.shape[0]\n\n        pred = model(images.to(device))\n        pred_classes = torch.max(pred, dim=1)[1]\n        accu_num += torch.eq(pred_classes, labels.to(device)).sum()\n\n        loss = loss_function(pred, labels.to(device))\n        accu_loss += loss\n\n        data_loader.desc = \"[valid epoch {}] loss: {:.3f}, acc: {:.3f}\".format(epoch,\n                                                                               accu_loss.item() / (step + 1),\n                                                                               accu_num.item() / sample_num)\n\n    return accu_loss.item() / (step + 1), accu_num.item() / sample_num\n"
  },
  {
    "path": "pytorch_classification/tensorboard_test/data_utils.py",
    "content": "import os\nimport json\nimport pickle\nimport random\n\nfrom PIL import Image\nimport torch\nimport numpy as np\nimport matplotlib.pyplot as plt\n\n\ndef read_split_data(root: str, val_rate: float = 0.2):\n    random.seed(0)  # 保证随机结果可复现\n    assert os.path.exists(root), \"dataset root: {} does not exist.\".format(root)\n\n    # 遍历文件夹，一个文件夹对应一个类别\n    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]\n    # 排序，保证顺序一致\n    flower_class.sort()\n    # 生成类别名称以及对应的数字索引\n    class_indices = dict((k, v) for v, k in enumerate(flower_class))\n    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    train_images_path = []  # 存储训练集的所有图片路径\n    train_images_label = []  # 存储训练集图片对应索引信息\n    val_images_path = []  # 存储验证集的所有图片路径\n    val_images_label = []  # 存储验证集图片对应索引信息\n    every_class_num = []  # 存储每个类别的样本总数\n    supported = [\".jpg\", \".JPG\", \".png\", \".PNG\"]  # 支持的文件后缀类型\n    # 遍历每个文件夹下的文件\n    for cla in flower_class:\n        cla_path = os.path.join(root, cla)\n        # 遍历获取supported支持的所有文件路径\n        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)\n                  if os.path.splitext(i)[-1] in supported]\n        # 获取该类别对应的索引\n        image_class = class_indices[cla]\n        # 记录该类别的样本数量\n        every_class_num.append(len(images))\n        # 按比例随机采样验证样本\n        val_path = random.sample(images, k=int(len(images) * val_rate))\n\n        for img_path in images:\n            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集\n                val_images_path.append(img_path)\n                val_images_label.append(image_class)\n            else:  # 否则存入训练集\n                train_images_path.append(img_path)\n                train_images_label.append(image_class)\n\n    print(\"{} images were found in the dataset.\".format(sum(every_class_num)))\n    print(\"{} images for training.\".format(len(train_images_path)))\n    print(\"{} images for validation.\".format(len(val_images_path)))\n\n    plot_image = False\n    if plot_image:\n        # 绘制每种类别个数柱状图\n        plt.bar(range(len(flower_class)), every_class_num, align='center')\n        # 将横坐标0,1,2,3,4替换为相应的类别名称\n        plt.xticks(range(len(flower_class)), flower_class)\n        # 在柱状图上添加数值标签\n        for i, v in enumerate(every_class_num):\n            plt.text(x=i, y=v + 5, s=str(v), ha='center')\n        # 设置x坐标\n        plt.xlabel('image class')\n        # 设置y坐标\n        plt.ylabel('number of images')\n        # 设置柱状图的标题\n        plt.title('flower class distribution')\n        plt.show()\n\n    return train_images_path, train_images_label, val_images_path, val_images_label\n\n\ndef plot_data_loader_image(data_loader):\n    batch_size = data_loader.batch_size\n    plot_num = min(batch_size, 4)\n\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), json_path + \" does not exist.\"\n    json_file = open(json_path, 'r')\n    class_indices = json.load(json_file)\n\n    for data in data_loader:\n        images, labels = data\n        for i in range(plot_num):\n            # [C, H, W] -> [H, W, C]\n            img = images[i].numpy().transpose(1, 2, 0)\n            # 反Normalize操作\n            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255\n            label = labels[i].item()\n            plt.subplot(1, plot_num, i+1)\n            plt.xlabel(class_indices[str(label)])\n            plt.xticks([])  # 去掉x轴的刻度\n            plt.yticks([])  # 去掉y轴的刻度\n            plt.imshow(img.astype('uint8'))\n        plt.show()\n\n\ndef write_pickle(list_info: list, file_name: str):\n    with open(file_name, 'wb') as f:\n        pickle.dump(list_info, f)\n\n\ndef read_pickle(file_name: str) -> list:\n    with open(file_name, 'rb') as f:\n        info_list = pickle.load(f)\n        return info_list\n\n\ndef plot_class_preds(net,\n                     images_dir: str,\n                     transform,\n                     num_plot: int = 5,\n                     device=\"cpu\"):\n    if not os.path.exists(images_dir):\n        print(\"not found {} path, ignore add figure.\".format(images_dir))\n        return None\n\n    label_path = os.path.join(images_dir, \"label.txt\")\n    if not os.path.exists(label_path):\n        print(\"not found {} file, ignore add figure\".format(label_path))\n        return None\n\n    # read class_indict\n    json_label_path = './class_indices.json'\n    assert os.path.exists(json_label_path), \"not found {}\".format(json_label_path)\n    json_file = open(json_label_path, 'r')\n    # {\"0\": \"daisy\"}\n    flower_class = json.load(json_file)\n    # {\"daisy\": \"0\"}\n    class_indices = dict((v, k) for k, v in flower_class.items())\n\n    # reading label.txt file\n    label_info = []\n    with open(label_path, \"r\") as rd:\n        for line in rd.readlines():\n            line = line.strip()\n            if len(line) > 0:\n                split_info = [i for i in line.split(\" \") if len(i) > 0]\n                assert len(split_info) == 2, \"label format error, expect file_name and class_name\"\n                image_name, class_name = split_info\n                image_path = os.path.join(images_dir, image_name)\n                # 如果文件不存在，则跳过\n                if not os.path.exists(image_path):\n                    print(\"not found {}, skip.\".format(image_path))\n                    continue\n                # 如果读取的类别不在给定的类别内，则跳过\n                if class_name not in class_indices.keys():\n                    print(\"unrecognized category {}, skip\".format(class_name))\n                    continue\n                label_info.append([image_path, class_name])\n\n    if len(label_info) == 0:\n        return None\n\n    # get first num_plot info\n    if len(label_info) > num_plot:\n        label_info = label_info[:num_plot]\n\n    num_imgs = len(label_info)\n    images = []\n    labels = []\n    for img_path, class_name in label_info:\n        # read img\n        img = Image.open(img_path).convert(\"RGB\")\n        label_index = int(class_indices[class_name])\n\n        # preprocessing\n        img = transform(img)\n        images.append(img)\n        labels.append(label_index)\n\n    # batching images\n    images = torch.stack(images, dim=0).to(device)\n\n    # inference\n    with torch.no_grad():\n        output = net(images)\n        probs, preds = torch.max(torch.softmax(output, dim=1), dim=1)\n        probs = probs.cpu().numpy()\n        preds = preds.cpu().numpy()\n\n    # width, height\n    fig = plt.figure(figsize=(num_imgs * 2.5, 3), dpi=100)\n    for i in range(num_imgs):\n        # 1：子图共1行，num_imgs:子图共num_imgs列，当前绘制第i+1个子图\n        ax = fig.add_subplot(1, num_imgs, i+1, xticks=[], yticks=[])\n\n        # CHW -> HWC\n        npimg = images[i].cpu().numpy().transpose(1, 2, 0)\n\n        # 将图像还原至标准化之前\n        # mean:[0.485, 0.456, 0.406], std:[0.229, 0.224, 0.225]\n        npimg = (npimg * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255\n        plt.imshow(npimg.astype('uint8'))\n\n        title = \"{}, {:.2f}%\\n(label: {})\".format(\n            flower_class[str(preds[i])],  # predict class\n            probs[i] * 100,  # predict probability\n            flower_class[str(labels[i])]  # true class\n        )\n        ax.set_title(title, color=(\"green\" if preds[i] == labels[i] else \"red\"))\n\n    return fig\n\n\n"
  },
  {
    "path": "pytorch_classification/tensorboard_test/model.py",
    "content": "import torch.nn as nn\nimport torch\n\n\nclass BasicBlock(nn.Module):\n    expansion = 1\n\n    def __init__(self, in_channel, out_channel, stride=1, downsample=None):\n        super(BasicBlock, self).__init__()\n        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,\n                               kernel_size=3, stride=stride, padding=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(out_channel)\n        self.relu = nn.ReLU()\n        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,\n                               kernel_size=3, stride=1, padding=1, bias=False)\n        self.bn2 = nn.BatchNorm2d(out_channel)\n        self.downsample = downsample\n\n    def forward(self, x):\n        identity = x\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass Bottleneck(nn.Module):\n    expansion = 4\n\n    def __init__(self, in_channel, out_channel, stride=1, downsample=None):\n        super(Bottleneck, self).__init__()\n        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,\n                               kernel_size=1, stride=1, bias=False)  # squeeze channels\n        self.bn1 = nn.BatchNorm2d(out_channel)\n        # -----------------------------------------\n        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,\n                               kernel_size=3, stride=stride, bias=False, padding=1)\n        self.bn2 = nn.BatchNorm2d(out_channel)\n        # -----------------------------------------\n        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion,\n                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels\n        self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n\n    def forward(self, x):\n        identity = x\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass ResNet(nn.Module):\n\n    def __init__(self, block, blocks_num, num_classes=1000, include_top=True):\n        super(ResNet, self).__init__()\n        self.include_top = include_top\n        self.in_channel = 64\n\n        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,\n                               padding=3, bias=False)\n        self.bn1 = nn.BatchNorm2d(self.in_channel)\n        self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.layer1 = self._make_layer(block, 64, blocks_num[0])\n        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)\n        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)\n        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)\n        if self.include_top:\n            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)\n            self.fc = nn.Linear(512 * block.expansion, num_classes)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')\n\n    def _make_layer(self, block, channel, block_num, stride=1):\n        downsample = None\n        if stride != 1 or self.in_channel != channel * block.expansion:\n            downsample = nn.Sequential(\n                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),\n                nn.BatchNorm2d(channel * block.expansion))\n\n        layers = []\n        layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride))\n        self.in_channel = channel * block.expansion\n\n        for _ in range(1, block_num):\n            layers.append(block(self.in_channel, channel))\n\n        return nn.Sequential(*layers)\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n\n        if self.include_top:\n            x = self.avgpool(x)\n            x = torch.flatten(x, 1)\n            x = self.fc(x)\n\n        return x\n\n\ndef resnet34(num_classes=1000, include_top=True):\n    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)\n\n\ndef resnet101(num_classes=1000, include_top=True):\n    return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)\n"
  },
  {
    "path": "pytorch_classification/tensorboard_test/my_dataset.py",
    "content": "from tqdm import tqdm\nfrom PIL import Image\nimport torch\nfrom torch.utils.data import Dataset\n\n\nclass MyDataSet(Dataset):\n    \"\"\"自定义数据集\"\"\"\n\n    def __init__(self, images_path: list, images_class: list, transform=None):\n        self.images_path = images_path\n        self.images_class = images_class\n        self.transform = transform\n\n        delete_img = []\n        for index, img_path in tqdm(enumerate(images_path)):\n            img = Image.open(img_path)\n            w, h = img.size\n            ratio = w / h\n            if ratio > 10 or ratio < 0.1:\n                delete_img.append(index)\n                # print(img_path, ratio)\n\n        for index in delete_img[::-1]:\n            self.images_path.pop(index)\n            self.images_class.pop(index)\n\n    def __len__(self):\n        return len(self.images_path)\n\n    def __getitem__(self, item):\n        img = Image.open(self.images_path[item])\n        # RGB为彩色图片，L为灰度图片\n        if img.mode != 'RGB':\n            raise ValueError(\"image: {} isn't RGB mode.\".format(self.images_path[item]))\n        label = self.images_class[item]\n\n        if self.transform is not None:\n            img = self.transform(img)\n\n        return img, label\n\n    @staticmethod\n    def collate_fn(batch):\n        # 官方实现的default_collate可以参考\n        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py\n        images, labels = tuple(zip(*batch))\n\n        images = torch.stack(images, dim=0)\n        labels = torch.as_tensor(labels)\n        return images, labels\n"
  },
  {
    "path": "pytorch_classification/tensorboard_test/requirements.txt",
    "content": "torchvision==0.7.0\ntqdm==4.42.1\nmatplotlib==3.2.1\ntorch==1.13.1\nPillow\ntensorboard\n"
  },
  {
    "path": "pytorch_classification/tensorboard_test/train.py",
    "content": "import os\nimport math\nimport argparse\n\nimport torch\nimport torch.optim as optim\nfrom torch.utils.tensorboard import SummaryWriter\nfrom torchvision import transforms\nimport torch.optim.lr_scheduler as lr_scheduler\n\nfrom model import resnet34\nfrom my_dataset import MyDataSet\nfrom data_utils import read_split_data, plot_class_preds\nfrom train_eval_utils import train_one_epoch, evaluate\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n\n    print(args)\n    print('Start Tensorboard with \"tensorboard --logdir=runs\", view at http://localhost:6006/')\n    # 实例化SummaryWriter对象\n    tb_writer = SummaryWriter(log_dir=\"runs/flower_experiment\")\n    if os.path.exists(\"./weights\") is False:\n        os.makedirs(\"./weights\")\n\n    # 划分数据为训练集和验证集\n    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)\n\n    # 定义训练以及预测时的预处理方法\n    data_transform = {\n        \"train\": transforms.Compose([transforms.RandomResizedCrop(224),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),\n        \"val\": transforms.Compose([transforms.Resize(256),\n                                   transforms.CenterCrop(224),\n                                   transforms.ToTensor(),\n                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}\n\n    # 实例化训练数据集\n    train_data_set = MyDataSet(images_path=train_images_path,\n                               images_class=train_images_label,\n                               transform=data_transform[\"train\"])\n\n    # 实例化验证数据集\n    val_data_set = MyDataSet(images_path=val_images_path,\n                             images_class=val_images_label,\n                             transform=data_transform[\"val\"])\n\n    batch_size = args.batch_size\n    # 计算使用num_workers的数量\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using {} dataloader workers every process'.format(nw))\n    train_loader = torch.utils.data.DataLoader(train_data_set,\n                                               batch_size=batch_size,\n                                               shuffle=True,\n                                               pin_memory=True,\n                                               num_workers=nw,\n                                               collate_fn=train_data_set.collate_fn)\n\n    val_loader = torch.utils.data.DataLoader(val_data_set,\n                                             batch_size=batch_size,\n                                             shuffle=False,\n                                             pin_memory=True,\n                                             num_workers=nw,\n                                             collate_fn=val_data_set.collate_fn)\n\n    # 实例化模型\n    model = resnet34(num_classes=args.num_classes).to(device)\n\n    # 将模型写入tensorboard\n    init_img = torch.zeros((1, 3, 224, 224), device=device)\n    tb_writer.add_graph(model, init_img)\n\n    # 如果存在预训练权重则载入\n    if os.path.exists(args.weights):\n        weights_dict = torch.load(args.weights, map_location=device)\n        load_weights_dict = {k: v for k, v in weights_dict.items()\n                             if model.state_dict()[k].numel() == v.numel()}\n        model.load_state_dict(load_weights_dict, strict=False)\n    else:\n        print(\"not using pretrain-weights.\")\n\n    # 是否冻结权重\n    if args.freeze_layers:\n        print(\"freeze layers except fc layer.\")\n        for name, para in model.named_parameters():\n            # 除最后的全连接层外，其他权重全部冻结\n            if \"fc\" not in name:\n                para.requires_grad_(False)\n\n    pg = [p for p in model.parameters() if p.requires_grad]\n    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=0.005)\n    # Scheduler https://arxiv.org/pdf/1812.01187.pdf\n    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine\n    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)\n\n    for epoch in range(args.epochs):\n        # train\n        mean_loss = train_one_epoch(model=model,\n                                    optimizer=optimizer,\n                                    data_loader=train_loader,\n                                    device=device,\n                                    epoch=epoch)\n        # update learning rate\n        scheduler.step()\n\n        # validate\n        acc = evaluate(model=model,\n                       data_loader=val_loader,\n                       device=device)\n\n        # add loss, acc and lr into tensorboard\n        print(\"[epoch {}] accuracy: {}\".format(epoch, round(acc, 3)))\n        tags = [\"train_loss\", \"accuracy\", \"learning_rate\"]\n        tb_writer.add_scalar(tags[0], mean_loss, epoch)\n        tb_writer.add_scalar(tags[1], acc, epoch)\n        tb_writer.add_scalar(tags[2], optimizer.param_groups[0][\"lr\"], epoch)\n\n        # add figure into tensorboard\n        fig = plot_class_preds(net=model,\n                               images_dir=\"./plot_img\",\n                               transform=data_transform[\"val\"],\n                               num_plot=5,\n                               device=device)\n        if fig is not None:\n            tb_writer.add_figure(\"predictions vs. actuals\",\n                                 figure=fig,\n                                 global_step=epoch)\n\n        # add conv1 weights into tensorboard\n        tb_writer.add_histogram(tag=\"conv1\",\n                                values=model.conv1.weight,\n                                global_step=epoch)\n        tb_writer.add_histogram(tag=\"layer1/block0/conv1\",\n                                values=model.layer1[0].conv1.weight,\n                                global_step=epoch)\n\n        # save weights\n        torch.save(model.state_dict(), \"./weights/model-{}.pth\".format(epoch))\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--num_classes', type=int, default=5)\n    parser.add_argument('--epochs', type=int, default=30)\n    parser.add_argument('--batch-size', type=int, default=16)\n    parser.add_argument('--lr', type=float, default=0.001)\n    parser.add_argument('--lrf', type=float, default=0.1)\n\n    # 数据集所在根目录\n    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz\n    img_root = \"/home/wz/my_project/my_github/data_set/flower_data/flower_photos\"\n    parser.add_argument('--data-path', type=str, default=img_root)\n\n    # resnet34 官方权重下载地址\n    # https://download.pytorch.org/models/resnet34-333f7ec4.pth\n    parser.add_argument('--weights', type=str, default='resNet34.pth',\n                        help='initial weights path')\n    parser.add_argument('--freeze-layers', type=bool, default=False)\n    parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0,1 or cpu)')\n\n    opt = parser.parse_args()\n\n    main(opt)\n"
  },
  {
    "path": "pytorch_classification/tensorboard_test/train_eval_utils.py",
    "content": "import sys\n\nfrom tqdm import tqdm\nimport torch\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch):\n    model.train()\n    loss_function = torch.nn.CrossEntropyLoss()\n    mean_loss = torch.zeros(1).to(device)\n    optimizer.zero_grad()\n\n    data_loader = tqdm(data_loader, file=sys.stdout)\n    for step, data in enumerate(data_loader):\n        images, labels = data\n        pred = model(images.to(device))\n\n        loss = loss_function(pred, labels.to(device))\n        loss.backward()\n        mean_loss = (mean_loss * step + loss.detach()) / (step + 1)  # update mean losses\n\n        # 打印平均loss\n        data_loader.desc = \"[epoch {}] mean loss {}\".format(epoch, round(mean_loss.item(), 3))\n\n        if not torch.isfinite(loss):\n            print('WARNING: non-finite loss, ending training ', loss)\n            sys.exit(1)\n\n        optimizer.step()\n        optimizer.zero_grad()\n\n    return mean_loss.item()\n\n\n@torch.no_grad()\ndef evaluate(model, data_loader, device):\n    model.eval()\n\n    # 用于存储预测正确的样本个数\n    sum_num = torch.zeros(1).to(device)\n    # 统计验证集样本总数目\n    num_samples = len(data_loader.dataset)\n\n    # 打印验证进度\n    data_loader = tqdm(data_loader, desc=\"validation...\", file=sys.stdout)\n\n    for step, data in enumerate(data_loader):\n        images, labels = data\n        pred = model(images.to(device))\n        pred = torch.max(pred, dim=1)[1]\n        sum_num += torch.eq(pred, labels.to(device)).sum()\n\n    # 计算预测正确的比例\n    acc = sum_num.item() / num_samples\n\n    return acc\n\n\n\n\n\n\n"
  },
  {
    "path": "pytorch_classification/train_multi_GPU/README.md",
    "content": "## 多GPU启动指令\n- 如果要使用```train_multi_gpu_using_launch.py```脚本，使用以下指令启动\n- ```python -m torch.distributed.launch --nproc_per_node=8 --use_env train_multi_gpu_using_launch.py```\n- 其中```nproc_per_node```为并行GPU的数量\n- 如果要指定使用某几块GPU可使用如下指令，例如使用第1块和第4块GPU进行训练：\n- ```CUDA_VISIBLE_DEVICES=0,3 python -m torch.distributed.launch --nproc_per_node=2 --use_env train_multi_gpu_using_launch.py```\n\n-----\n\n- 如果要使用```train_multi_gpu_using_spawn.py```脚本，使用以下指令启动\n- ```python train_multi_gpu_using_spawn.py```\n\n## 训练时间对比\n![training time](training_time.png)\n\n## 是否使用SyncBatchNorm\n![syncbn](syncbn.png)\n\n## 单GPU与多GPU训练曲线\n![accuracy](accuracy.png)\n"
  },
  {
    "path": "pytorch_classification/train_multi_GPU/model.py",
    "content": "import torch.nn as nn\nimport torch\n\n\nclass BasicBlock(nn.Module):\n    expansion = 1\n\n    def __init__(self, in_channel, out_channel, stride=1, downsample=None):\n        super(BasicBlock, self).__init__()\n        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,\n                               kernel_size=3, stride=stride, padding=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(out_channel)\n        self.relu = nn.ReLU()\n        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,\n                               kernel_size=3, stride=1, padding=1, bias=False)\n        self.bn2 = nn.BatchNorm2d(out_channel)\n        self.downsample = downsample\n\n    def forward(self, x):\n        identity = x\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass Bottleneck(nn.Module):\n    expansion = 4\n\n    def __init__(self, in_channel, out_channel, stride=1, downsample=None):\n        super(Bottleneck, self).__init__()\n        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,\n                               kernel_size=1, stride=1, bias=False)  # squeeze channels\n        self.bn1 = nn.BatchNorm2d(out_channel)\n        # -----------------------------------------\n        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,\n                               kernel_size=3, stride=stride, bias=False, padding=1)\n        self.bn2 = nn.BatchNorm2d(out_channel)\n        # -----------------------------------------\n        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion,\n                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels\n        self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n\n    def forward(self, x):\n        identity = x\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass ResNet(nn.Module):\n\n    def __init__(self, block, blocks_num, num_classes=1000, include_top=True):\n        super(ResNet, self).__init__()\n        self.include_top = include_top\n        self.in_channel = 64\n\n        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,\n                               padding=3, bias=False)\n        self.bn1 = nn.BatchNorm2d(self.in_channel)\n        self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.layer1 = self._make_layer(block, 64, blocks_num[0])\n        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)\n        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)\n        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)\n        if self.include_top:\n            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)\n            self.fc = nn.Linear(512 * block.expansion, num_classes)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')\n\n    def _make_layer(self, block, channel, block_num, stride=1):\n        downsample = None\n        if stride != 1 or self.in_channel != channel * block.expansion:\n            downsample = nn.Sequential(\n                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),\n                nn.BatchNorm2d(channel * block.expansion))\n\n        layers = []\n        layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride))\n        self.in_channel = channel * block.expansion\n\n        for _ in range(1, block_num):\n            layers.append(block(self.in_channel, channel))\n\n        return nn.Sequential(*layers)\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n\n        if self.include_top:\n            x = self.avgpool(x)\n            x = torch.flatten(x, 1)\n            x = self.fc(x)\n\n        return x\n\n\ndef resnet34(num_classes=1000, include_top=True):\n    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)\n\n\ndef resnet101(num_classes=1000, include_top=True):\n    return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)\n"
  },
  {
    "path": "pytorch_classification/train_multi_GPU/multi_train_utils/distributed_utils.py",
    "content": "import os\n\nimport torch\nimport torch.distributed as dist\n\n\ndef init_distributed_mode(args):\n    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:\n        args.rank = int(os.environ[\"RANK\"])\n        args.world_size = int(os.environ['WORLD_SIZE'])\n        args.gpu = int(os.environ['LOCAL_RANK'])\n    elif 'SLURM_PROCID' in os.environ:\n        args.rank = int(os.environ['SLURM_PROCID'])\n        args.gpu = args.rank % torch.cuda.device_count()\n    else:\n        print('Not using distributed mode')\n        args.distributed = False\n        return\n\n    args.distributed = True\n\n    torch.cuda.set_device(args.gpu)\n    args.dist_backend = 'nccl'  # 通信后端，nvidia GPU推荐使用NCCL\n    print('| distributed init (rank {}): {}'.format(\n        args.rank, args.dist_url), flush=True)\n    dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,\n                            world_size=args.world_size, rank=args.rank)\n    dist.barrier()\n\n\ndef cleanup():\n    dist.destroy_process_group()\n\n\ndef is_dist_avail_and_initialized():\n    \"\"\"检查是否支持分布式环境\"\"\"\n    if not dist.is_available():\n        return False\n    if not dist.is_initialized():\n        return False\n    return True\n\n\ndef get_world_size():\n    if not is_dist_avail_and_initialized():\n        return 1\n    return dist.get_world_size()\n\n\ndef get_rank():\n    if not is_dist_avail_and_initialized():\n        return 0\n    return dist.get_rank()\n\n\ndef is_main_process():\n    return get_rank() == 0\n\n\ndef reduce_value(value, average=True):\n    world_size = get_world_size()\n    if world_size < 2:  # 单GPU的情况\n        return value\n\n    with torch.no_grad():\n        dist.all_reduce(value)\n        if average:\n            value /= world_size\n\n        return value\n"
  },
  {
    "path": "pytorch_classification/train_multi_GPU/multi_train_utils/train_eval_utils.py",
    "content": "import sys\n\nfrom tqdm import tqdm\nimport torch\n\nfrom multi_train_utils.distributed_utils import reduce_value, is_main_process\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch):\n    model.train()\n    loss_function = torch.nn.CrossEntropyLoss()\n    mean_loss = torch.zeros(1).to(device)\n    optimizer.zero_grad()\n\n    # 在进程0中打印训练进度\n    if is_main_process():\n        data_loader = tqdm(data_loader, file=sys.stdout)\n\n    for step, data in enumerate(data_loader):\n        images, labels = data\n\n        pred = model(images.to(device))\n\n        loss = loss_function(pred, labels.to(device))\n        loss.backward()\n        loss = reduce_value(loss, average=True)\n        mean_loss = (mean_loss * step + loss.detach()) / (step + 1)  # update mean losses\n\n        # 在进程0中打印平均loss\n        if is_main_process():\n            data_loader.desc = \"[epoch {}] mean loss {}\".format(epoch, round(mean_loss.item(), 3))\n\n        if not torch.isfinite(loss):\n            print('WARNING: non-finite loss, ending training ', loss)\n            sys.exit(1)\n\n        optimizer.step()\n        optimizer.zero_grad()\n\n    # 等待所有进程计算完毕\n    if device != torch.device(\"cpu\"):\n        torch.cuda.synchronize(device)\n\n    return mean_loss.item()\n\n\n@torch.no_grad()\ndef evaluate(model, data_loader, device):\n    model.eval()\n\n    # 用于存储预测正确的样本个数\n    sum_num = torch.zeros(1).to(device)\n\n    # 在进程0中打印验证进度\n    if is_main_process():\n        data_loader = tqdm(data_loader, file=sys.stdout)\n\n    for step, data in enumerate(data_loader):\n        images, labels = data\n        pred = model(images.to(device))\n        pred = torch.max(pred, dim=1)[1]\n        sum_num += torch.eq(pred, labels.to(device)).sum()\n\n    # 等待所有进程计算完毕\n    if device != torch.device(\"cpu\"):\n        torch.cuda.synchronize(device)\n\n    sum_num = reduce_value(sum_num, average=False)\n\n    return sum_num.item()\n\n\n\n\n\n\n"
  },
  {
    "path": "pytorch_classification/train_multi_GPU/my_dataset.py",
    "content": "from PIL import Image\nimport torch\nfrom torch.utils.data import Dataset\n\n\nclass MyDataSet(Dataset):\n    \"\"\"自定义数据集\"\"\"\n\n    def __init__(self, images_path: list, images_class: list, transform=None):\n        self.images_path = images_path\n        self.images_class = images_class\n        self.transform = transform\n\n    def __len__(self):\n        return len(self.images_path)\n\n    def __getitem__(self, item):\n        img = Image.open(self.images_path[item])\n        # RGB为彩色图片，L为灰度图片\n        if img.mode != 'RGB':\n            raise ValueError(\"image: {} isn't RGB mode.\".format(self.images_path[item]))\n        label = self.images_class[item]\n\n        if self.transform is not None:\n            img = self.transform(img)\n\n        return img, label\n\n    @staticmethod\n    def collate_fn(batch):\n        # 官方实现的default_collate可以参考\n        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py\n        images, labels = tuple(zip(*batch))\n\n        images = torch.stack(images, dim=0)\n        labels = torch.as_tensor(labels)\n        return images, labels\n"
  },
  {
    "path": "pytorch_classification/train_multi_GPU/plot_results.py",
    "content": "import math\nimport matplotlib.pyplot as plt\n\nx = [0, 1, 2, 3]\ny = [9, 5.5, 3, 2]\n\nplt.bar(x, y, align='center')\nplt.xticks(range(len(x)), ['One-GPU', '2 GPUs', '4 GPUs', '8 GPUs'])\nplt.ylim((0, 10))\nfor i, v in enumerate(y):\n    plt.text(x=i, y=v + 0.1, s=str(v) + ' s', ha='center')\nplt.xlabel('Using number of GPU device')\nplt.ylabel('Training time per epoch (second)')\nplt.show()\nplt.close()\n\nx = list(range(30))\nno_SyncBatchNorm = [0.348, 0.495, 0.587, 0.554, 0.637,\n                    0.622, 0.689, 0.673, 0.702, 0.717,\n                    0.717, 0.69, 0.716, 0.696, 0.738,\n                    0.75, 0.75, 0.66, 0.713, 0.758,\n                    0.777, 0.777, 0.769, 0.792, 0.802,\n                    0.807, 0.807, 0.804, 0.812, 0.811]\n\nSyncBatchNorm = [0.283, 0.514, 0.531, 0.654, 0.671,\n                 0.591, 0.621, 0.685, 0.701, 0.732,\n                 0.701, 0.74, 0.667, 0.723, 0.745,\n                 0.679, 0.738, 0.772, 0.764, 0.765,\n                 0.764, 0.791, 0.818, 0.791, 0.807,\n                 0.806, 0.811, 0.821, 0.833, 0.81]\n\nplt.plot(x, no_SyncBatchNorm, label=\"No SyncBatchNorm\")\nplt.plot(x, SyncBatchNorm, label=\"SyncBatchNorm\")\nplt.xlabel('Training epochs')\nplt.ylabel('Accuracy')\nplt.legend()\nplt.show()\nplt.close()\n\n\nx = list(range(30))\nsingle_gpu = [0.569, 0.576, 0.654, 0.648, 0.609,\n              0.637, 0.699, 0.709, 0.715, 0.715,\n              0.717, 0.724, 0.722, 0.731, 0.721,\n              0.774, 0.751, 0.787, 0.78, 0.77,\n              0.763, 0.803, 0.754, 0.796, 0.799,\n              0.815, 0.793, 0.808, 0.811, 0.806]\nplt.plot(x, single_gpu, color=\"black\", label=\"Single GPU\")\nplt.plot(x, no_SyncBatchNorm, label=\"No SyncBatchNorm\")\nplt.plot(x, SyncBatchNorm, label=\"SyncBatchNorm\")\nplt.xlabel('Training epochs')\nplt.ylabel('Accuracy')\nplt.legend()\nplt.show()\nplt.close()\n\n\n# epochs = 30\n# lrf = 0.1\n# lf0 = lambda x: math.cos(x * math.pi / epochs)\n# lf1 = lambda x: 1 + math.cos(x * math.pi / epochs)\n# lf2 = lambda x: (1 + math.cos(x * math.pi / epochs)) / 2\n# lf3 = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - lrf) + lrf\n# x = range(epochs)\n# y0 = [lf0(epoch) for epoch in x]\n# y1 = [lf1(epoch) for epoch in x]\n# y2 = [lf2(epoch) for epoch in x]\n# y3 = [lf3(epoch) for epoch in x]\n# plt.subplot(2, 2, 1)\n# plt.plot(x, y0)\n# plt.hlines(1, 0, epochs-1, colors=\"r\", linestyles=\"dashed\")\n# plt.hlines(-1, 0, epochs-1, colors=\"r\", linestyles=\"dashed\")\n# plt.xlim((0, epochs-1))\n#\n# plt.subplot(2, 2, 2)\n# plt.plot(x, y1)\n# plt.hlines(2, 0, epochs-1, colors=\"r\", linestyles=\"dashed\")\n# plt.hlines(0, 0, epochs-1, colors=\"r\", linestyles=\"dashed\")\n# plt.xlim((0, epochs-1))\n#\n# plt.subplot(2, 2, 3)\n# plt.plot(x, y2)\n# plt.hlines(1, 0, epochs-1, colors=\"r\", linestyles=\"dashed\")\n# plt.hlines(0, 0, epochs-1, colors=\"r\", linestyles=\"dashed\")\n# plt.xlim((0, epochs-1))\n#\n# plt.subplot(2, 2, 4)\n# plt.plot(x, y3)\n# plt.hlines(1, 0, epochs-1, colors=\"r\", linestyles=\"dashed\")\n# plt.hlines(lrf, 0, epochs-1, colors=\"r\", linestyles=\"dashed\")\n# plt.text(epochs-1, y3[-1], \"{}\".format(round(y3[-1], 1)))\n# plt.xlim((0, epochs-1))\n#\n# plt.show()\n# plt.close()\n"
  },
  {
    "path": "pytorch_classification/train_multi_GPU/requirements.txt",
    "content": "matplotlib==3.2.1\ntqdm==4.42.1\ntorchvision==0.7.0\ntorch==1.13.1\n"
  },
  {
    "path": "pytorch_classification/train_multi_GPU/train_multi_gpu_using_launch.py",
    "content": "import os\nimport math\nimport tempfile\nimport argparse\n\nimport torch\nimport torch.optim as optim\nimport torch.optim.lr_scheduler as lr_scheduler\nfrom torch.utils.tensorboard import SummaryWriter\nfrom torchvision import transforms\n\nfrom model import resnet34\nfrom my_dataset import MyDataSet\nfrom utils import read_split_data, plot_data_loader_image\nfrom multi_train_utils.distributed_utils import init_distributed_mode, dist, cleanup\nfrom multi_train_utils.train_eval_utils import train_one_epoch, evaluate\n\n\ndef main(args):\n    if torch.cuda.is_available() is False:\n        raise EnvironmentError(\"not find GPU device for training.\")\n\n    # 初始化各进程环境\n    init_distributed_mode(args=args)\n\n    rank = args.rank\n    device = torch.device(args.device)\n    batch_size = args.batch_size\n    weights_path = args.weights\n    args.lr *= args.world_size  # 学习率要根据并行GPU的数量进行倍增\n    checkpoint_path = \"\"\n\n    if rank == 0:  # 在第一个进程中打印信息，并实例化tensorboard\n        print(args)\n        print('Start Tensorboard with \"tensorboard --logdir=runs\", view at http://localhost:6006/')\n        tb_writer = SummaryWriter()\n        if os.path.exists(\"./weights\") is False:\n            os.makedirs(\"./weights\")\n\n    train_info, val_info, num_classes = read_split_data(args.data_path)\n    train_images_path, train_images_label = train_info\n    val_images_path, val_images_label = val_info\n\n    # check num_classes\n    assert args.num_classes == num_classes, \"dataset num_classes: {}, input {}\".format(args.num_classes,\n                                                                                       num_classes)\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.RandomResizedCrop(224),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),\n        \"val\": transforms.Compose([transforms.Resize(256),\n                                   transforms.CenterCrop(224),\n                                   transforms.ToTensor(),\n                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}\n\n    # 实例化训练数据集\n    train_data_set = MyDataSet(images_path=train_images_path,\n                               images_class=train_images_label,\n                               transform=data_transform[\"train\"])\n\n    # 实例化验证数据集\n    val_data_set = MyDataSet(images_path=val_images_path,\n                             images_class=val_images_label,\n                             transform=data_transform[\"val\"])\n\n    # 给每个rank对应的进程分配训练的样本索引\n    train_sampler = torch.utils.data.distributed.DistributedSampler(train_data_set)\n    val_sampler = torch.utils.data.distributed.DistributedSampler(val_data_set)\n\n    # 将样本索引每batch_size个元素组成一个list\n    train_batch_sampler = torch.utils.data.BatchSampler(\n        train_sampler, batch_size, drop_last=True)\n\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    if rank == 0:\n        print('Using {} dataloader workers every process'.format(nw))\n    train_loader = torch.utils.data.DataLoader(train_data_set,\n                                               batch_sampler=train_batch_sampler,\n                                               pin_memory=True,\n                                               num_workers=nw,\n                                               collate_fn=train_data_set.collate_fn)\n\n    val_loader = torch.utils.data.DataLoader(val_data_set,\n                                             batch_size=batch_size,\n                                             sampler=val_sampler,\n                                             pin_memory=True,\n                                             num_workers=nw,\n                                             collate_fn=val_data_set.collate_fn)\n    # 实例化模型\n    model = resnet34(num_classes=num_classes).to(device)\n\n    # 如果存在预训练权重则载入\n    if os.path.exists(weights_path):\n        weights_dict = torch.load(weights_path, map_location=device)\n        load_weights_dict = {k: v for k, v in weights_dict.items()\n                             if model.state_dict()[k].numel() == v.numel()}\n        model.load_state_dict(load_weights_dict, strict=False)\n    else:\n        checkpoint_path = os.path.join(tempfile.gettempdir(), \"initial_weights.pt\")\n        # 如果不存在预训练权重，需要将第一个进程中的权重保存，然后其他进程载入，保持初始化权重一致\n        if rank == 0:\n            torch.save(model.state_dict(), checkpoint_path)\n\n        dist.barrier()\n        # 这里注意，一定要指定map_location参数，否则会导致第一块GPU占用更多资源\n        model.load_state_dict(torch.load(checkpoint_path, map_location=device))\n\n    # 是否冻结权重\n    if args.freeze_layers:\n        for name, para in model.named_parameters():\n            # 除最后的全连接层外，其他权重全部冻结\n            if \"fc\" not in name:\n                para.requires_grad_(False)\n    else:\n        # 只有训练带有BN结构的网络时使用SyncBatchNorm采用意义\n        if args.syncBN:\n            # 使用SyncBatchNorm后训练会更耗时\n            model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)\n\n    # 转为DDP模型\n    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])\n\n    # optimizer\n    pg = [p for p in model.parameters() if p.requires_grad]\n    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=0.005)\n    # Scheduler https://arxiv.org/pdf/1812.01187.pdf\n    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine\n    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)\n\n    for epoch in range(args.epochs):\n        train_sampler.set_epoch(epoch)\n\n        mean_loss = train_one_epoch(model=model,\n                                    optimizer=optimizer,\n                                    data_loader=train_loader,\n                                    device=device,\n                                    epoch=epoch)\n\n        scheduler.step()\n\n        sum_num = evaluate(model=model,\n                           data_loader=val_loader,\n                           device=device)\n        acc = sum_num / val_sampler.total_size\n\n        if rank == 0:\n            print(\"[epoch {}] accuracy: {}\".format(epoch, round(acc, 3)))\n            tags = [\"loss\", \"accuracy\", \"learning_rate\"]\n            tb_writer.add_scalar(tags[0], mean_loss, epoch)\n            tb_writer.add_scalar(tags[1], acc, epoch)\n            tb_writer.add_scalar(tags[2], optimizer.param_groups[0][\"lr\"], epoch)\n\n            torch.save(model.module.state_dict(), \"./weights/model-{}.pth\".format(epoch))\n\n    # 删除临时缓存文件\n    if rank == 0:\n        if os.path.exists(checkpoint_path) is True:\n            os.remove(checkpoint_path)\n\n    cleanup()\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--num_classes', type=int, default=5)\n    parser.add_argument('--epochs', type=int, default=30)\n    parser.add_argument('--batch-size', type=int, default=16)\n    parser.add_argument('--lr', type=float, default=0.001)\n    parser.add_argument('--lrf', type=float, default=0.1)\n    # 是否启用SyncBatchNorm\n    parser.add_argument('--syncBN', type=bool, default=True)\n\n    # 数据集所在根目录\n    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz\n    parser.add_argument('--data-path', type=str, default=\"/home/wz/data_set/flower_data/flower_photos\")\n\n    # resnet34 官方权重下载地址\n    # https://download.pytorch.org/models/resnet34-333f7ec4.pth\n    parser.add_argument('--weights', type=str, default='resNet34.pth',\n                        help='initial weights path')\n    parser.add_argument('--freeze-layers', type=bool, default=False)\n    # 不要改该参数，系统会自动分配\n    parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0,1 or cpu)')\n    # 开启的进程数(注意不是线程),不用设置该参数，会根据nproc_per_node自动设置\n    parser.add_argument('--world-size', default=4, type=int,\n                        help='number of distributed processes')\n    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')\n    opt = parser.parse_args()\n\n    main(opt)\n"
  },
  {
    "path": "pytorch_classification/train_multi_GPU/train_multi_gpu_using_spawn.py",
    "content": "import os\nimport math\nimport tempfile\nimport argparse\n\nimport torch\nimport torch.multiprocessing as mp\nfrom torch.multiprocessing import Process\nimport torch.optim as optim\nimport torch.optim.lr_scheduler as lr_scheduler\nfrom torch.utils.tensorboard import SummaryWriter\nfrom torchvision import transforms\n\nfrom model import resnet34\nfrom my_dataset import MyDataSet\nfrom utils import read_split_data, plot_data_loader_image\nfrom multi_train_utils.distributed_utils import dist, cleanup\nfrom multi_train_utils.train_eval_utils import train_one_epoch, evaluate\n\n\ndef main_fun(rank, world_size, args):\n    if torch.cuda.is_available() is False:\n        raise EnvironmentError(\"not find GPU device for training.\")\n\n    # 初始化各进程环境 start\n    os.environ[\"MASTER_ADDR\"] = \"localhost\"\n    os.environ[\"MASTER_PORT\"] = \"12355\"\n\n    args.rank = rank\n    args.world_size = world_size\n    args.gpu = rank\n\n    args.distributed = True\n\n    torch.cuda.set_device(args.gpu)\n    args.dist_backend = 'nccl'\n    print('| distributed init (rank {}): {}'.format(\n        args.rank, args.dist_url), flush=True)\n    dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,\n                            world_size=args.world_size, rank=args.rank)\n    dist.barrier()\n    # 初始化各进程环境 end\n\n    rank = args.rank\n    device = torch.device(args.device)\n    batch_size = args.batch_size\n    weights_path = args.weights\n    args.lr *= args.world_size  # 学习率要根据并行GPU的数量进行倍增\n    checkpoint_path = \"\"\n\n    if rank == 0:  # 在第一个进程中打印信息，并实例化tensorboard\n        print(args)\n        print('Start Tensorboard with \"tensorboard --logdir=runs\", view at http://localhost:6006/')\n        tb_writer = SummaryWriter()\n        if os.path.exists(\"./weights\") is False:\n            os.makedirs(\"./weights\")\n\n    train_info, val_info, num_classes = read_split_data(args.data_path)\n    train_images_path, train_images_label = train_info\n    val_images_path, val_images_label = val_info\n\n    # check num_classes\n    assert args.num_classes == num_classes, \"dataset num_classes: {}, input {}\".format(args.num_classes,\n                                                                                       num_classes)\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.RandomResizedCrop(224),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),\n        \"val\": transforms.Compose([transforms.Resize(256),\n                                   transforms.CenterCrop(224),\n                                   transforms.ToTensor(),\n                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}\n\n    # 实例化训练数据集\n    train_data_set = MyDataSet(images_path=train_images_path,\n                               images_class=train_images_label,\n                               transform=data_transform[\"train\"])\n\n    # 实例化验证数据集\n    val_data_set = MyDataSet(images_path=val_images_path,\n                             images_class=val_images_label,\n                             transform=data_transform[\"val\"])\n\n    # 给每个rank对应的进程分配训练的样本索引\n    train_sampler = torch.utils.data.distributed.DistributedSampler(train_data_set)\n    val_sampler = torch.utils.data.distributed.DistributedSampler(val_data_set)\n\n    # 将样本索引每batch_size个元素组成一个list\n    train_batch_sampler = torch.utils.data.BatchSampler(\n        train_sampler, batch_size, drop_last=True)\n\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    if rank == 0:\n        print('Using {} dataloader workers every process'.format(nw))\n\n    train_loader = torch.utils.data.DataLoader(train_data_set,\n                                               batch_sampler=train_batch_sampler,\n                                               pin_memory=True,\n                                               num_workers=nw,\n                                               collate_fn=train_data_set.collate_fn)\n\n    val_loader = torch.utils.data.DataLoader(val_data_set,\n                                             batch_size=batch_size,\n                                             sampler=val_sampler,\n                                             pin_memory=True,\n                                             num_workers=nw,\n                                             collate_fn=val_data_set.collate_fn)\n    # 实例化模型\n    model = resnet34(num_classes=num_classes).to(device)\n\n    # 如果存在预训练权重则载入\n    if os.path.exists(weights_path):\n        weights_dict = torch.load(weights_path, map_location=device)\n        load_weights_dict = {k: v for k, v in weights_dict.items()\n                             if model.state_dict()[k].numel() == v.numel()}\n        model.load_state_dict(load_weights_dict, strict=False)\n    else:\n        checkpoint_path = os.path.join(tempfile.gettempdir(), \"initial_weights.pt\")\n        # 如果不存在预训练权重，需要将第一个进程中的权重保存，然后其他进程载入，保持初始化权重一致\n        if rank == 0:\n            torch.save(model.state_dict(), checkpoint_path)\n\n        dist.barrier()\n        # 这里注意，一定要指定map_location参数，否则会导致第一块GPU占用更多资源\n        model.load_state_dict(torch.load(checkpoint_path, map_location=device))\n\n    # 是否冻结权重\n    if args.freeze_layers:\n        for name, para in model.named_parameters():\n            # 除最后的全连接层外，其他权重全部冻结\n            if \"fc\" not in name:\n                para.requires_grad_(False)\n    else:\n        # 只有训练带有BN结构的网络时使用SyncBatchNorm采用意义\n        if args.syncBN:\n            # 使用SyncBatchNorm后训练会更耗时\n            model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)\n\n    # 转为DDP模型\n    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])\n\n    # optimizer\n    pg = [p for p in model.parameters() if p.requires_grad]\n    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=0.005)\n    # Scheduler https://arxiv.org/pdf/1812.01187.pdf\n    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine\n    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)\n\n    for epoch in range(args.epochs):\n        train_sampler.set_epoch(epoch)\n\n        mean_loss = train_one_epoch(model=model,\n                                    optimizer=optimizer,\n                                    data_loader=train_loader,\n                                    device=device,\n                                    epoch=epoch)\n\n        scheduler.step()\n\n        sum_num = evaluate(model=model,\n                           data_loader=val_loader,\n                           device=device)\n        acc = sum_num / val_sampler.total_size\n\n        if rank == 0:\n            print(\"[epoch {}] accuracy: {}\".format(epoch, round(acc, 3)))\n            tags = [\"loss\", \"accuracy\", \"learning_rate\"]\n            tb_writer.add_scalar(tags[0], mean_loss, epoch)\n            tb_writer.add_scalar(tags[1], acc, epoch)\n            tb_writer.add_scalar(tags[2], optimizer.param_groups[0][\"lr\"], epoch)\n\n            torch.save(model.module.state_dict(), \"./weights/model-{}.pth\".format(epoch))\n\n    # 删除临时缓存文件\n    if rank == 0:\n        if os.path.exists(checkpoint_path) is True:\n            os.remove(checkpoint_path)\n\n    cleanup()\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--num_classes', type=int, default=5)\n    parser.add_argument('--epochs', type=int, default=30)\n    parser.add_argument('--batch-size', type=int, default=16)\n    parser.add_argument('--lr', type=float, default=0.001)\n    parser.add_argument('--lrf', type=float, default=0.1)\n    # 是否启用SyncBatchNorm\n    parser.add_argument('--syncBN', type=bool, default=True)\n\n    # 数据集所在根目录\n    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz\n    parser.add_argument('--data-path', type=str, default=\"/home/wz/data_set/flower_data/flower_photos\")\n\n    # resnet34 官方权重下载地址\n    # https://download.pytorch.org/models/resnet34-333f7ec4.pth\n    parser.add_argument('--weights', type=str, default='resNet34.pth',\n                        help='initial weights path')\n    parser.add_argument('--freeze-layers', type=bool, default=False)\n    # 不要改该参数，系统会自动分配\n    parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0,1 or cpu)')\n    # 开启的进程数(注意不是线程),在单机中指使用GPU的数量\n    parser.add_argument('--world-size', default=4, type=int,\n                        help='number of distributed processes')\n    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')\n    opt = parser.parse_args()\n\n    # when using mp.spawn, if I set number of works greater 1,\n    # before each epoch training and validation will wait about 10 seconds\n\n    # mp.spawn(main_fun,\n    #          args=(opt.world_size, opt),\n    #          nprocs=opt.world_size,\n    #          join=True)\n\n    world_size = opt.world_size\n    processes = []\n    for rank in range(world_size):\n        p = Process(target=main_fun, args=(rank, world_size, opt))\n        p.start()\n        processes.append(p)\n    for p in processes:\n        p.join()\n\n"
  },
  {
    "path": "pytorch_classification/train_multi_GPU/train_single_gpu.py",
    "content": "import os\nimport math\nimport argparse\n\nimport torch\nimport torch.optim as optim\nfrom torch.utils.tensorboard import SummaryWriter\nfrom torchvision import transforms\nimport torch.optim.lr_scheduler as lr_scheduler\n\nfrom model import resnet34, resnet101\nfrom my_dataset import MyDataSet\nfrom utils import read_split_data\nfrom multi_train_utils.train_eval_utils import train_one_epoch, evaluate\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n\n    print(args)\n    print('Start Tensorboard with \"tensorboard --logdir=runs\", view at http://localhost:6006/')\n    tb_writer = SummaryWriter()\n    if os.path.exists(\"./weights\") is False:\n        os.makedirs(\"./weights\")\n\n    train_info, val_info, num_classes = read_split_data(args.data_path)\n    train_images_path, train_images_label = train_info\n    val_images_path, val_images_label = val_info\n\n    # check num_classes\n    assert args.num_classes == num_classes, \"dataset num_classes: {}, input {}\".format(args.num_classes,\n                                                                                       num_classes)\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.RandomResizedCrop(224),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),\n        \"val\": transforms.Compose([transforms.Resize(256),\n                                   transforms.CenterCrop(224),\n                                   transforms.ToTensor(),\n                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}\n\n    # 实例化训练数据集\n    train_data_set = MyDataSet(images_path=train_images_path,\n                               images_class=train_images_label,\n                               transform=data_transform[\"train\"])\n\n    # 实例化验证数据集\n    val_data_set = MyDataSet(images_path=val_images_path,\n                             images_class=val_images_label,\n                             transform=data_transform[\"val\"])\n\n    batch_size = args.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using {} dataloader workers every process'.format(nw))\n    train_loader = torch.utils.data.DataLoader(train_data_set,\n                                               batch_size=batch_size,\n                                               shuffle=True,\n                                               pin_memory=True,\n                                               num_workers=nw,\n                                               collate_fn=train_data_set.collate_fn)\n\n    val_loader = torch.utils.data.DataLoader(val_data_set,\n                                             batch_size=batch_size,\n                                             shuffle=False,\n                                             pin_memory=True,\n                                             num_workers=nw,\n                                             collate_fn=val_data_set.collate_fn)\n\n    # 如果存在预训练权重则载入\n    model = resnet34(num_classes=args.num_classes).to(device)\n    if args.weights != \"\":\n        if os.path.exists(args.weights):\n            weights_dict = torch.load(args.weights, map_location=device)\n            load_weights_dict = {k: v for k, v in weights_dict.items()\n                                 if model.state_dict()[k].numel() == v.numel()}\n            print(model.load_state_dict(load_weights_dict, strict=False))\n        else:\n            raise FileNotFoundError(\"not found weights file: {}\".format(args.weights))\n\n    # 是否冻结权重\n    if args.freeze_layers:\n        for name, para in model.named_parameters():\n            # 除最后的全连接层外，其他权重全部冻结\n            if \"fc\" not in name:\n                para.requires_grad_(False)\n\n    pg = [p for p in model.parameters() if p.requires_grad]\n    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=0.005)\n    # Scheduler https://arxiv.org/pdf/1812.01187.pdf\n    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine\n    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)\n\n    for epoch in range(args.epochs):\n        # train\n        mean_loss = train_one_epoch(model=model,\n                                    optimizer=optimizer,\n                                    data_loader=train_loader,\n                                    device=device,\n                                    epoch=epoch)\n\n        scheduler.step()\n\n        # validate\n        sum_num = evaluate(model=model,\n                           data_loader=val_loader,\n                           device=device)\n        acc = sum_num / len(val_data_set)\n        print(\"[epoch {}] accuracy: {}\".format(epoch, round(acc, 3)))\n        tags = [\"loss\", \"accuracy\", \"learning_rate\"]\n        tb_writer.add_scalar(tags[0], mean_loss, epoch)\n        tb_writer.add_scalar(tags[1], acc, epoch)\n        tb_writer.add_scalar(tags[2], optimizer.param_groups[0][\"lr\"], epoch)\n\n        torch.save(model.state_dict(), \"./weights/model-{}.pth\".format(epoch))\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--num_classes', type=int, default=5)\n    parser.add_argument('--epochs', type=int, default=30)\n    parser.add_argument('--batch-size', type=int, default=16)\n    parser.add_argument('--lr', type=float, default=0.001)\n    parser.add_argument('--lrf', type=float, default=0.1)\n\n    # 数据集所在根目录\n    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz\n    parser.add_argument('--data-path', type=str,\n                        default=\"/home/w180662/my_project/my_github/data_set/flower_data/flower_photos\")\n\n    # resnet34 官方权重下载地址\n    # https://download.pytorch.org/models/resnet34-333f7ec4.pth\n    parser.add_argument('--weights', type=str, default='resNet34.pth',\n                        help='initial weights path')\n    parser.add_argument('--freeze-layers', type=bool, default=False)\n    parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0,1 or cpu)')\n\n    opt = parser.parse_args()\n\n    main(opt)\n"
  },
  {
    "path": "pytorch_classification/train_multi_GPU/utils.py",
    "content": "import os\nimport json\nimport pickle\nimport random\n\nimport matplotlib.pyplot as plt\n\n\ndef read_split_data(root: str, val_rate: float = 0.2):\n    random.seed(0)  # 保证随机结果可复现\n    assert os.path.exists(root), \"dataset root: {} does not exist.\".format(root)\n\n    # 遍历文件夹，一个文件夹对应一个类别\n    class_names = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]\n    # 排序，保证各平台顺序一致\n    class_names.sort()\n    # 生成类别名称以及对应的数字索引\n    class_indices = dict((k, v) for v, k in enumerate(class_names))\n    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    train_images_path = []  # 存储训练集的所有图片路径\n    train_images_label = []  # 存储训练集图片对应索引信息\n    val_images_path = []  # 存储验证集的所有图片路径\n    val_images_label = []  # 存储验证集图片对应索引信息\n    every_class_num = []  # 存储每个类别的样本总数\n    supported = [\".jpg\", \".JPG\", \".png\", \".PNG\"]  # 支持的文件后缀类型\n    # 遍历每个文件夹下的文件\n    for cla in class_names:\n        cla_path = os.path.join(root, cla)\n        # 遍历获取supported支持的所有文件路径\n        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)\n                  if os.path.splitext(i)[-1] in supported]\n        # 排序，保证各平台顺序一致\n        images.sort()\n        # 获取该类别对应的索引\n        image_class = class_indices[cla]\n        # 记录该类别的样本数量\n        every_class_num.append(len(images))\n        # 按比例随机采样验证样本\n        val_path = random.sample(images, k=int(len(images) * val_rate))\n\n        for img_path in images:\n            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集\n                val_images_path.append(img_path)\n                val_images_label.append(image_class)\n            else:  # 否则存入训练集\n                train_images_path.append(img_path)\n                train_images_label.append(image_class)\n\n    print(\"{} images were found in the dataset.\".format(sum(every_class_num)))\n    print(\"{} images for training.\".format(len(train_images_path)))\n    print(\"{} images for validation.\".format(len(val_images_path)))\n    assert len(train_images_path) > 0, \"number of training images must greater than 0.\"\n    assert len(val_images_path) > 0, \"number of validation images must greater than 0.\"\n\n    plot_image = False\n    if plot_image:\n        # 绘制每种类别个数柱状图\n        plt.bar(range(len(class_names)), every_class_num, align='center')\n        # 将横坐标0,1,2,3,4替换为相应的类别名称\n        plt.xticks(range(len(class_names)), class_names)\n        # 在柱状图上添加数值标签\n        for i, v in enumerate(every_class_num):\n            plt.text(x=i, y=v + 5, s=str(v), ha='center')\n        # 设置x坐标\n        plt.xlabel('image class')\n        # 设置y坐标\n        plt.ylabel('number of images')\n        # 设置柱状图的标题\n        plt.title('flower class distribution')\n        plt.show()\n\n    return [train_images_path, train_images_label], [val_images_path, val_images_label], len(class_names)\n\n\ndef plot_data_loader_image(data_loader):\n    batch_size = data_loader.batch_size\n    plot_num = min(batch_size, 4)\n\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), json_path + \" does not exist.\"\n    json_file = open(json_path, 'r')\n    class_indices = json.load(json_file)\n\n    for data in data_loader:\n        images, labels = data\n        for i in range(plot_num):\n            # [C, H, W] -> [H, W, C]\n            img = images[i].numpy().transpose(1, 2, 0)\n            # 反Normalize操作\n            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255\n            label = labels[i].item()\n            plt.subplot(1, plot_num, i+1)\n            plt.xlabel(class_indices[str(label)])\n            plt.xticks([])  # 去掉x轴的刻度\n            plt.yticks([])  # 去掉y轴的刻度\n            plt.imshow(img.astype('uint8'))\n        plt.show()\n\n\ndef write_pickle(list_info: list, file_name: str):\n    with open(file_name, 'wb') as f:\n        pickle.dump(list_info, f)\n\n\ndef read_pickle(file_name: str) -> list:\n    with open(file_name, 'rb') as f:\n        info_list = pickle.load(f)\n        return info_list\n"
  },
  {
    "path": "pytorch_classification/vision_transformer/README.md",
    "content": "## 代码使用简介\n\n1. 下载好数据集，代码中默认使用的是花分类数据集，下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz),\n如果下载不了的话可以通过百度云链接下载: https://pan.baidu.com/s/1QLCTA4sXnQAw_yvxPj9szg 提取码:58p0\n2. 在`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径\n3. 下载预训练权重，在`vit_model.py`文件中每个模型都有提供预训练权重的下载地址，根据自己使用的模型下载对应预训练权重\n4. 在`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径\n5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件)\n6. 在`predict.py`脚本中导入和训练脚本中同样的模型，并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下)\n7. 在`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径\n8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了\n9. 如果要使用自己的数据集，请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹)，并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数\n"
  },
  {
    "path": "pytorch_classification/vision_transformer/flops.py",
    "content": "import torch\nfrom fvcore.nn import FlopCountAnalysis\n\nfrom vit_model import Attention\n\n\ndef main():\n    # Self-Attention\n    a1 = Attention(dim=512, num_heads=1)\n    a1.proj = torch.nn.Identity()  # remove Wo\n\n    # Multi-Head Attention\n    a2 = Attention(dim=512, num_heads=8)\n\n    # [batch_size, num_tokens, total_embed_dim]\n    t = (torch.rand(32, 1024, 512),)\n\n    flops1 = FlopCountAnalysis(a1, t)\n    print(\"Self-Attention FLOPs:\", flops1.total())\n\n    flops2 = FlopCountAnalysis(a2, t)\n    print(\"Multi-Head Attention FLOPs:\", flops2.total())\n\n\nif __name__ == '__main__':\n    main()\n\n"
  },
  {
    "path": "pytorch_classification/vision_transformer/my_dataset.py",
    "content": "from PIL import Image\nimport torch\nfrom torch.utils.data import Dataset\n\n\nclass MyDataSet(Dataset):\n    \"\"\"自定义数据集\"\"\"\n\n    def __init__(self, images_path: list, images_class: list, transform=None):\n        self.images_path = images_path\n        self.images_class = images_class\n        self.transform = transform\n\n    def __len__(self):\n        return len(self.images_path)\n\n    def __getitem__(self, item):\n        img = Image.open(self.images_path[item])\n        # RGB为彩色图片，L为灰度图片\n        if img.mode != 'RGB':\n            raise ValueError(\"image: {} isn't RGB mode.\".format(self.images_path[item]))\n        label = self.images_class[item]\n\n        if self.transform is not None:\n            img = self.transform(img)\n\n        return img, label\n\n    @staticmethod\n    def collate_fn(batch):\n        # 官方实现的default_collate可以参考\n        # https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py\n        images, labels = tuple(zip(*batch))\n\n        images = torch.stack(images, dim=0)\n        labels = torch.as_tensor(labels)\n        return images, labels\n"
  },
  {
    "path": "pytorch_classification/vision_transformer/predict.py",
    "content": "import os\nimport json\n\nimport torch\nfrom PIL import Image\nfrom torchvision import transforms\nimport matplotlib.pyplot as plt\n\nfrom vit_model import vit_base_patch16_224_in21k as create_model\n\n\ndef main():\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n\n    data_transform = transforms.Compose(\n        [transforms.Resize(256),\n         transforms.CenterCrop(224),\n         transforms.ToTensor(),\n         transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n    plt.imshow(img)\n    # [N, C, H, W]\n    img = data_transform(img)\n    # expand batch dimension\n    img = torch.unsqueeze(img, dim=0)\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    # create model\n    model = create_model(num_classes=5, has_logits=False).to(device)\n    # load model weights\n    model_weight_path = \"./weights/model-9.pth\"\n    model.load_state_dict(torch.load(model_weight_path, map_location=device))\n    model.eval()\n    with torch.no_grad():\n        # predict class\n        output = torch.squeeze(model(img.to(device))).cpu()\n        predict = torch.softmax(output, dim=0)\n        predict_cla = torch.argmax(predict).numpy()\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_cla)],\n                                                 predict[predict_cla].numpy())\n    plt.title(print_res)\n    for i in range(len(predict)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  predict[i].numpy()))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_classification/vision_transformer/train.py",
    "content": "import os\nimport math\nimport argparse\n\nimport torch\nimport torch.optim as optim\nimport torch.optim.lr_scheduler as lr_scheduler\nfrom torch.utils.tensorboard import SummaryWriter\nfrom torchvision import transforms\n\n\nfrom my_dataset import MyDataSet\nfrom vit_model import vit_base_patch16_224_in21k as create_model\nfrom utils import read_split_data, train_one_epoch, evaluate\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n\n    if os.path.exists(\"./weights\") is False:\n        os.makedirs(\"./weights\")\n\n    tb_writer = SummaryWriter()\n\n    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.RandomResizedCrop(224),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.ToTensor(),\n                                     transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),\n        \"val\": transforms.Compose([transforms.Resize(256),\n                                   transforms.CenterCrop(224),\n                                   transforms.ToTensor(),\n                                   transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])}\n\n    # 实例化训练数据集\n    train_dataset = MyDataSet(images_path=train_images_path,\n                              images_class=train_images_label,\n                              transform=data_transform[\"train\"])\n\n    # 实例化验证数据集\n    val_dataset = MyDataSet(images_path=val_images_path,\n                            images_class=val_images_label,\n                            transform=data_transform[\"val\"])\n\n    batch_size = args.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using {} dataloader workers every process'.format(nw))\n    train_loader = torch.utils.data.DataLoader(train_dataset,\n                                               batch_size=batch_size,\n                                               shuffle=True,\n                                               pin_memory=True,\n                                               num_workers=nw,\n                                               collate_fn=train_dataset.collate_fn)\n\n    val_loader = torch.utils.data.DataLoader(val_dataset,\n                                             batch_size=batch_size,\n                                             shuffle=False,\n                                             pin_memory=True,\n                                             num_workers=nw,\n                                             collate_fn=val_dataset.collate_fn)\n\n    model = create_model(num_classes=args.num_classes, has_logits=False).to(device)\n\n    if args.weights != \"\":\n        assert os.path.exists(args.weights), \"weights file: '{}' not exist.\".format(args.weights)\n        weights_dict = torch.load(args.weights, map_location=device)\n        # 删除不需要的权重\n        del_keys = ['head.weight', 'head.bias'] if model.has_logits \\\n            else ['pre_logits.fc.weight', 'pre_logits.fc.bias', 'head.weight', 'head.bias']\n        for k in del_keys:\n            del weights_dict[k]\n        print(model.load_state_dict(weights_dict, strict=False))\n\n    if args.freeze_layers:\n        for name, para in model.named_parameters():\n            # 除head, pre_logits外，其他权重全部冻结\n            if \"head\" not in name and \"pre_logits\" not in name:\n                para.requires_grad_(False)\n            else:\n                print(\"training {}\".format(name))\n\n    pg = [p for p in model.parameters() if p.requires_grad]\n    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=5E-5)\n    # Scheduler https://arxiv.org/pdf/1812.01187.pdf\n    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf  # cosine\n    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)\n\n    for epoch in range(args.epochs):\n        # train\n        train_loss, train_acc = train_one_epoch(model=model,\n                                                optimizer=optimizer,\n                                                data_loader=train_loader,\n                                                device=device,\n                                                epoch=epoch)\n\n        scheduler.step()\n\n        # validate\n        val_loss, val_acc = evaluate(model=model,\n                                     data_loader=val_loader,\n                                     device=device,\n                                     epoch=epoch)\n\n        tags = [\"train_loss\", \"train_acc\", \"val_loss\", \"val_acc\", \"learning_rate\"]\n        tb_writer.add_scalar(tags[0], train_loss, epoch)\n        tb_writer.add_scalar(tags[1], train_acc, epoch)\n        tb_writer.add_scalar(tags[2], val_loss, epoch)\n        tb_writer.add_scalar(tags[3], val_acc, epoch)\n        tb_writer.add_scalar(tags[4], optimizer.param_groups[0][\"lr\"], epoch)\n\n        torch.save(model.state_dict(), \"./weights/model-{}.pth\".format(epoch))\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--num_classes', type=int, default=5)\n    parser.add_argument('--epochs', type=int, default=10)\n    parser.add_argument('--batch-size', type=int, default=8)\n    parser.add_argument('--lr', type=float, default=0.001)\n    parser.add_argument('--lrf', type=float, default=0.01)\n\n    # 数据集所在根目录\n    # https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz\n    parser.add_argument('--data-path', type=str,\n                        default=\"/data/flower_photos\")\n    parser.add_argument('--model-name', default='', help='create model name')\n\n    # 预训练权重路径，如果不想载入就设置为空字符\n    parser.add_argument('--weights', type=str, default='./vit_base_patch16_224_in21k.pth',\n                        help='initial weights path')\n    # 是否冻结权重\n    parser.add_argument('--freeze-layers', type=bool, default=True)\n    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')\n\n    opt = parser.parse_args()\n\n    main(opt)\n"
  },
  {
    "path": "pytorch_classification/vision_transformer/utils.py",
    "content": "import os\nimport sys\nimport json\nimport pickle\nimport random\n\nimport torch\nfrom tqdm import tqdm\n\nimport matplotlib.pyplot as plt\n\n\ndef read_split_data(root: str, val_rate: float = 0.2):\n    random.seed(0)  # 保证随机结果可复现\n    assert os.path.exists(root), \"dataset root: {} does not exist.\".format(root)\n\n    # 遍历文件夹，一个文件夹对应一个类别\n    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]\n    # 排序，保证各平台顺序一致\n    flower_class.sort()\n    # 生成类别名称以及对应的数字索引\n    class_indices = dict((k, v) for v, k in enumerate(flower_class))\n    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    train_images_path = []  # 存储训练集的所有图片路径\n    train_images_label = []  # 存储训练集图片对应索引信息\n    val_images_path = []  # 存储验证集的所有图片路径\n    val_images_label = []  # 存储验证集图片对应索引信息\n    every_class_num = []  # 存储每个类别的样本总数\n    supported = [\".jpg\", \".JPG\", \".png\", \".PNG\"]  # 支持的文件后缀类型\n    # 遍历每个文件夹下的文件\n    for cla in flower_class:\n        cla_path = os.path.join(root, cla)\n        # 遍历获取supported支持的所有文件路径\n        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)\n                  if os.path.splitext(i)[-1] in supported]\n        # 排序，保证各平台顺序一致\n        images.sort()\n        # 获取该类别对应的索引\n        image_class = class_indices[cla]\n        # 记录该类别的样本数量\n        every_class_num.append(len(images))\n        # 按比例随机采样验证样本\n        val_path = random.sample(images, k=int(len(images) * val_rate))\n\n        for img_path in images:\n            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集\n                val_images_path.append(img_path)\n                val_images_label.append(image_class)\n            else:  # 否则存入训练集\n                train_images_path.append(img_path)\n                train_images_label.append(image_class)\n\n    print(\"{} images were found in the dataset.\".format(sum(every_class_num)))\n    print(\"{} images for training.\".format(len(train_images_path)))\n    print(\"{} images for validation.\".format(len(val_images_path)))\n    assert len(train_images_path) > 0, \"number of training images must greater than 0.\"\n    assert len(val_images_path) > 0, \"number of validation images must greater than 0.\"\n\n    plot_image = False\n    if plot_image:\n        # 绘制每种类别个数柱状图\n        plt.bar(range(len(flower_class)), every_class_num, align='center')\n        # 将横坐标0,1,2,3,4替换为相应的类别名称\n        plt.xticks(range(len(flower_class)), flower_class)\n        # 在柱状图上添加数值标签\n        for i, v in enumerate(every_class_num):\n            plt.text(x=i, y=v + 5, s=str(v), ha='center')\n        # 设置x坐标\n        plt.xlabel('image class')\n        # 设置y坐标\n        plt.ylabel('number of images')\n        # 设置柱状图的标题\n        plt.title('flower class distribution')\n        plt.show()\n\n    return train_images_path, train_images_label, val_images_path, val_images_label\n\n\ndef plot_data_loader_image(data_loader):\n    batch_size = data_loader.batch_size\n    plot_num = min(batch_size, 4)\n\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), json_path + \" does not exist.\"\n    json_file = open(json_path, 'r')\n    class_indices = json.load(json_file)\n\n    for data in data_loader:\n        images, labels = data\n        for i in range(plot_num):\n            # [C, H, W] -> [H, W, C]\n            img = images[i].numpy().transpose(1, 2, 0)\n            # 反Normalize操作\n            img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255\n            label = labels[i].item()\n            plt.subplot(1, plot_num, i+1)\n            plt.xlabel(class_indices[str(label)])\n            plt.xticks([])  # 去掉x轴的刻度\n            plt.yticks([])  # 去掉y轴的刻度\n            plt.imshow(img.astype('uint8'))\n        plt.show()\n\n\ndef write_pickle(list_info: list, file_name: str):\n    with open(file_name, 'wb') as f:\n        pickle.dump(list_info, f)\n\n\ndef read_pickle(file_name: str) -> list:\n    with open(file_name, 'rb') as f:\n        info_list = pickle.load(f)\n        return info_list\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch):\n    model.train()\n    loss_function = torch.nn.CrossEntropyLoss()\n    accu_loss = torch.zeros(1).to(device)  # 累计损失\n    accu_num = torch.zeros(1).to(device)   # 累计预测正确的样本数\n    optimizer.zero_grad()\n\n    sample_num = 0\n    data_loader = tqdm(data_loader, file=sys.stdout)\n    for step, data in enumerate(data_loader):\n        images, labels = data\n        sample_num += images.shape[0]\n\n        pred = model(images.to(device))\n        pred_classes = torch.max(pred, dim=1)[1]\n        accu_num += torch.eq(pred_classes, labels.to(device)).sum()\n\n        loss = loss_function(pred, labels.to(device))\n        loss.backward()\n        accu_loss += loss.detach()\n\n        data_loader.desc = \"[train epoch {}] loss: {:.3f}, acc: {:.3f}\".format(epoch,\n                                                                               accu_loss.item() / (step + 1),\n                                                                               accu_num.item() / sample_num)\n\n        if not torch.isfinite(loss):\n            print('WARNING: non-finite loss, ending training ', loss)\n            sys.exit(1)\n\n        optimizer.step()\n        optimizer.zero_grad()\n\n    return accu_loss.item() / (step + 1), accu_num.item() / sample_num\n\n\n@torch.no_grad()\ndef evaluate(model, data_loader, device, epoch):\n    loss_function = torch.nn.CrossEntropyLoss()\n\n    model.eval()\n\n    accu_num = torch.zeros(1).to(device)   # 累计预测正确的样本数\n    accu_loss = torch.zeros(1).to(device)  # 累计损失\n\n    sample_num = 0\n    data_loader = tqdm(data_loader, file=sys.stdout)\n    for step, data in enumerate(data_loader):\n        images, labels = data\n        sample_num += images.shape[0]\n\n        pred = model(images.to(device))\n        pred_classes = torch.max(pred, dim=1)[1]\n        accu_num += torch.eq(pred_classes, labels.to(device)).sum()\n\n        loss = loss_function(pred, labels.to(device))\n        accu_loss += loss\n\n        data_loader.desc = \"[valid epoch {}] loss: {:.3f}, acc: {:.3f}\".format(epoch,\n                                                                               accu_loss.item() / (step + 1),\n                                                                               accu_num.item() / sample_num)\n\n    return accu_loss.item() / (step + 1), accu_num.item() / sample_num\n"
  },
  {
    "path": "pytorch_classification/vision_transformer/vit_model.py",
    "content": "\"\"\"\noriginal code from rwightman:\nhttps://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py\n\"\"\"\nfrom functools import partial\nfrom collections import OrderedDict\n\nimport torch\nimport torch.nn as nn\n\n\ndef drop_path(x, drop_prob: float = 0., training: bool = False):\n    \"\"\"\n    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).\n    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,\n    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...\n    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for\n    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use\n    'survival rate' as the argument.\n    \"\"\"\n    if drop_prob == 0. or not training:\n        return x\n    keep_prob = 1 - drop_prob\n    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets\n    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)\n    random_tensor.floor_()  # binarize\n    output = x.div(keep_prob) * random_tensor\n    return output\n\n\nclass DropPath(nn.Module):\n    \"\"\"\n    Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).\n    \"\"\"\n    def __init__(self, drop_prob=None):\n        super(DropPath, self).__init__()\n        self.drop_prob = drop_prob\n\n    def forward(self, x):\n        return drop_path(x, self.drop_prob, self.training)\n\n\nclass PatchEmbed(nn.Module):\n    \"\"\"\n    2D Image to Patch Embedding\n    \"\"\"\n    def __init__(self, img_size=224, patch_size=16, in_c=3, embed_dim=768, norm_layer=None):\n        super().__init__()\n        img_size = (img_size, img_size)\n        patch_size = (patch_size, patch_size)\n        self.img_size = img_size\n        self.patch_size = patch_size\n        self.grid_size = (img_size[0] // patch_size[0], img_size[1] // patch_size[1])\n        self.num_patches = self.grid_size[0] * self.grid_size[1]\n\n        self.proj = nn.Conv2d(in_c, embed_dim, kernel_size=patch_size, stride=patch_size)\n        self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()\n\n    def forward(self, x):\n        B, C, H, W = x.shape\n        assert H == self.img_size[0] and W == self.img_size[1], \\\n            f\"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]}).\"\n\n        # flatten: [B, C, H, W] -> [B, C, HW]\n        # transpose: [B, C, HW] -> [B, HW, C]\n        x = self.proj(x).flatten(2).transpose(1, 2)\n        x = self.norm(x)\n        return x\n\n\nclass Attention(nn.Module):\n    def __init__(self,\n                 dim,   # 输入token的dim\n                 num_heads=8,\n                 qkv_bias=False,\n                 qk_scale=None,\n                 attn_drop_ratio=0.,\n                 proj_drop_ratio=0.):\n        super(Attention, self).__init__()\n        self.num_heads = num_heads\n        head_dim = dim // num_heads\n        self.scale = qk_scale or head_dim ** -0.5\n        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)\n        self.attn_drop = nn.Dropout(attn_drop_ratio)\n        self.proj = nn.Linear(dim, dim)\n        self.proj_drop = nn.Dropout(proj_drop_ratio)\n\n    def forward(self, x):\n        # [batch_size, num_patches + 1, total_embed_dim]\n        B, N, C = x.shape\n\n        # qkv(): -> [batch_size, num_patches + 1, 3 * total_embed_dim]\n        # reshape: -> [batch_size, num_patches + 1, 3, num_heads, embed_dim_per_head]\n        # permute: -> [3, batch_size, num_heads, num_patches + 1, embed_dim_per_head]\n        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)\n        # [batch_size, num_heads, num_patches + 1, embed_dim_per_head]\n        q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple)\n\n        # transpose: -> [batch_size, num_heads, embed_dim_per_head, num_patches + 1]\n        # @: multiply -> [batch_size, num_heads, num_patches + 1, num_patches + 1]\n        attn = (q @ k.transpose(-2, -1)) * self.scale\n        attn = attn.softmax(dim=-1)\n        attn = self.attn_drop(attn)\n\n        # @: multiply -> [batch_size, num_heads, num_patches + 1, embed_dim_per_head]\n        # transpose: -> [batch_size, num_patches + 1, num_heads, embed_dim_per_head]\n        # reshape: -> [batch_size, num_patches + 1, total_embed_dim]\n        x = (attn @ v).transpose(1, 2).reshape(B, N, C)\n        x = self.proj(x)\n        x = self.proj_drop(x)\n        return x\n\n\nclass Mlp(nn.Module):\n    \"\"\"\n    MLP as used in Vision Transformer, MLP-Mixer and related networks\n    \"\"\"\n    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):\n        super().__init__()\n        out_features = out_features or in_features\n        hidden_features = hidden_features or in_features\n        self.fc1 = nn.Linear(in_features, hidden_features)\n        self.act = act_layer()\n        self.fc2 = nn.Linear(hidden_features, out_features)\n        self.drop = nn.Dropout(drop)\n\n    def forward(self, x):\n        x = self.fc1(x)\n        x = self.act(x)\n        x = self.drop(x)\n        x = self.fc2(x)\n        x = self.drop(x)\n        return x\n\n\nclass Block(nn.Module):\n    def __init__(self,\n                 dim,\n                 num_heads,\n                 mlp_ratio=4.,\n                 qkv_bias=False,\n                 qk_scale=None,\n                 drop_ratio=0.,\n                 attn_drop_ratio=0.,\n                 drop_path_ratio=0.,\n                 act_layer=nn.GELU,\n                 norm_layer=nn.LayerNorm):\n        super(Block, self).__init__()\n        self.norm1 = norm_layer(dim)\n        self.attn = Attention(dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,\n                              attn_drop_ratio=attn_drop_ratio, proj_drop_ratio=drop_ratio)\n        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here\n        self.drop_path = DropPath(drop_path_ratio) if drop_path_ratio > 0. else nn.Identity()\n        self.norm2 = norm_layer(dim)\n        mlp_hidden_dim = int(dim * mlp_ratio)\n        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop_ratio)\n\n    def forward(self, x):\n        x = x + self.drop_path(self.attn(self.norm1(x)))\n        x = x + self.drop_path(self.mlp(self.norm2(x)))\n        return x\n\n\nclass VisionTransformer(nn.Module):\n    def __init__(self, img_size=224, patch_size=16, in_c=3, num_classes=1000,\n                 embed_dim=768, depth=12, num_heads=12, mlp_ratio=4.0, qkv_bias=True,\n                 qk_scale=None, representation_size=None, distilled=False, drop_ratio=0.,\n                 attn_drop_ratio=0., drop_path_ratio=0., embed_layer=PatchEmbed, norm_layer=None,\n                 act_layer=None):\n        \"\"\"\n        Args:\n            img_size (int, tuple): input image size\n            patch_size (int, tuple): patch size\n            in_c (int): number of input channels\n            num_classes (int): number of classes for classification head\n            embed_dim (int): embedding dimension\n            depth (int): depth of transformer\n            num_heads (int): number of attention heads\n            mlp_ratio (int): ratio of mlp hidden dim to embedding dim\n            qkv_bias (bool): enable bias for qkv if True\n            qk_scale (float): override default qk scale of head_dim ** -0.5 if set\n            representation_size (Optional[int]): enable and set representation layer (pre-logits) to this value if set\n            distilled (bool): model includes a distillation token and head as in DeiT models\n            drop_ratio (float): dropout rate\n            attn_drop_ratio (float): attention dropout rate\n            drop_path_ratio (float): stochastic depth rate\n            embed_layer (nn.Module): patch embedding layer\n            norm_layer: (nn.Module): normalization layer\n        \"\"\"\n        super(VisionTransformer, self).__init__()\n        self.num_classes = num_classes\n        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models\n        self.num_tokens = 2 if distilled else 1\n        norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)\n        act_layer = act_layer or nn.GELU\n\n        self.patch_embed = embed_layer(img_size=img_size, patch_size=patch_size, in_c=in_c, embed_dim=embed_dim)\n        num_patches = self.patch_embed.num_patches\n\n        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))\n        self.dist_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) if distilled else None\n        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + self.num_tokens, embed_dim))\n        self.pos_drop = nn.Dropout(p=drop_ratio)\n\n        dpr = [x.item() for x in torch.linspace(0, drop_path_ratio, depth)]  # stochastic depth decay rule\n        self.blocks = nn.Sequential(*[\n            Block(dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,\n                  drop_ratio=drop_ratio, attn_drop_ratio=attn_drop_ratio, drop_path_ratio=dpr[i],\n                  norm_layer=norm_layer, act_layer=act_layer)\n            for i in range(depth)\n        ])\n        self.norm = norm_layer(embed_dim)\n\n        # Representation layer\n        if representation_size and not distilled:\n            self.has_logits = True\n            self.num_features = representation_size\n            self.pre_logits = nn.Sequential(OrderedDict([\n                (\"fc\", nn.Linear(embed_dim, representation_size)),\n                (\"act\", nn.Tanh())\n            ]))\n        else:\n            self.has_logits = False\n            self.pre_logits = nn.Identity()\n\n        # Classifier head(s)\n        self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity()\n        self.head_dist = None\n        if distilled:\n            self.head_dist = nn.Linear(self.embed_dim, self.num_classes) if num_classes > 0 else nn.Identity()\n\n        # Weight init\n        nn.init.trunc_normal_(self.pos_embed, std=0.02)\n        if self.dist_token is not None:\n            nn.init.trunc_normal_(self.dist_token, std=0.02)\n\n        nn.init.trunc_normal_(self.cls_token, std=0.02)\n        self.apply(_init_vit_weights)\n\n    def forward_features(self, x):\n        # [B, C, H, W] -> [B, num_patches, embed_dim]\n        x = self.patch_embed(x)  # [B, 196, 768]\n        # [1, 1, 768] -> [B, 1, 768]\n        cls_token = self.cls_token.expand(x.shape[0], -1, -1)\n        if self.dist_token is None:\n            x = torch.cat((cls_token, x), dim=1)  # [B, 197, 768]\n        else:\n            x = torch.cat((cls_token, self.dist_token.expand(x.shape[0], -1, -1), x), dim=1)\n\n        x = self.pos_drop(x + self.pos_embed)\n        x = self.blocks(x)\n        x = self.norm(x)\n        if self.dist_token is None:\n            return self.pre_logits(x[:, 0])\n        else:\n            return x[:, 0], x[:, 1]\n\n    def forward(self, x):\n        x = self.forward_features(x)\n        if self.head_dist is not None:\n            x, x_dist = self.head(x[0]), self.head_dist(x[1])\n            if self.training and not torch.jit.is_scripting():\n                # during inference, return the average of both classifier predictions\n                return x, x_dist\n            else:\n                return (x + x_dist) / 2\n        else:\n            x = self.head(x)\n        return x\n\n\ndef _init_vit_weights(m):\n    \"\"\"\n    ViT weight initialization\n    :param m: module\n    \"\"\"\n    if isinstance(m, nn.Linear):\n        nn.init.trunc_normal_(m.weight, std=.01)\n        if m.bias is not None:\n            nn.init.zeros_(m.bias)\n    elif isinstance(m, nn.Conv2d):\n        nn.init.kaiming_normal_(m.weight, mode=\"fan_out\")\n        if m.bias is not None:\n            nn.init.zeros_(m.bias)\n    elif isinstance(m, nn.LayerNorm):\n        nn.init.zeros_(m.bias)\n        nn.init.ones_(m.weight)\n\n\ndef vit_base_patch16_224(num_classes: int = 1000):\n    \"\"\"\n    ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).\n    ImageNet-1k weights @ 224x224, source https://github.com/google-research/vision_transformer.\n    weights ported from official Google JAX impl:\n    链接: https://pan.baidu.com/s/1zqb08naP0RPqqfSXfkB2EA  密码: eu9f\n    \"\"\"\n    model = VisionTransformer(img_size=224,\n                              patch_size=16,\n                              embed_dim=768,\n                              depth=12,\n                              num_heads=12,\n                              representation_size=None,\n                              num_classes=num_classes)\n    return model\n\n\ndef vit_base_patch16_224_in21k(num_classes: int = 21843, has_logits: bool = True):\n    \"\"\"\n    ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).\n    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.\n    weights ported from official Google JAX impl:\n    https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_patch16_224_in21k-e5005f0a.pth\n    \"\"\"\n    model = VisionTransformer(img_size=224,\n                              patch_size=16,\n                              embed_dim=768,\n                              depth=12,\n                              num_heads=12,\n                              representation_size=768 if has_logits else None,\n                              num_classes=num_classes)\n    return model\n\n\ndef vit_base_patch32_224(num_classes: int = 1000):\n    \"\"\"\n    ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).\n    ImageNet-1k weights @ 224x224, source https://github.com/google-research/vision_transformer.\n    weights ported from official Google JAX impl:\n    链接: https://pan.baidu.com/s/1hCv0U8pQomwAtHBYc4hmZg  密码: s5hl\n    \"\"\"\n    model = VisionTransformer(img_size=224,\n                              patch_size=32,\n                              embed_dim=768,\n                              depth=12,\n                              num_heads=12,\n                              representation_size=None,\n                              num_classes=num_classes)\n    return model\n\n\ndef vit_base_patch32_224_in21k(num_classes: int = 21843, has_logits: bool = True):\n    \"\"\"\n    ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).\n    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.\n    weights ported from official Google JAX impl:\n    https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_patch32_224_in21k-8db57226.pth\n    \"\"\"\n    model = VisionTransformer(img_size=224,\n                              patch_size=32,\n                              embed_dim=768,\n                              depth=12,\n                              num_heads=12,\n                              representation_size=768 if has_logits else None,\n                              num_classes=num_classes)\n    return model\n\n\ndef vit_large_patch16_224(num_classes: int = 1000):\n    \"\"\"\n    ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).\n    ImageNet-1k weights @ 224x224, source https://github.com/google-research/vision_transformer.\n    weights ported from official Google JAX impl:\n    链接: https://pan.baidu.com/s/1cxBgZJJ6qUWPSBNcE4TdRQ  密码: qqt8\n    \"\"\"\n    model = VisionTransformer(img_size=224,\n                              patch_size=16,\n                              embed_dim=1024,\n                              depth=24,\n                              num_heads=16,\n                              representation_size=None,\n                              num_classes=num_classes)\n    return model\n\n\ndef vit_large_patch16_224_in21k(num_classes: int = 21843, has_logits: bool = True):\n    \"\"\"\n    ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).\n    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.\n    weights ported from official Google JAX impl:\n    https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_patch16_224_in21k-606da67d.pth\n    \"\"\"\n    model = VisionTransformer(img_size=224,\n                              patch_size=16,\n                              embed_dim=1024,\n                              depth=24,\n                              num_heads=16,\n                              representation_size=1024 if has_logits else None,\n                              num_classes=num_classes)\n    return model\n\n\ndef vit_large_patch32_224_in21k(num_classes: int = 21843, has_logits: bool = True):\n    \"\"\"\n    ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929).\n    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.\n    weights ported from official Google JAX impl:\n    https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_patch32_224_in21k-9046d2e7.pth\n    \"\"\"\n    model = VisionTransformer(img_size=224,\n                              patch_size=32,\n                              embed_dim=1024,\n                              depth=24,\n                              num_heads=16,\n                              representation_size=1024 if has_logits else None,\n                              num_classes=num_classes)\n    return model\n\n\ndef vit_huge_patch14_224_in21k(num_classes: int = 21843, has_logits: bool = True):\n    \"\"\"\n    ViT-Huge model (ViT-H/14) from original paper (https://arxiv.org/abs/2010.11929).\n    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.\n    NOTE: converted weights not currently available, too large for github release hosting.\n    \"\"\"\n    model = VisionTransformer(img_size=224,\n                              patch_size=14,\n                              embed_dim=1280,\n                              depth=32,\n                              num_heads=16,\n                              representation_size=1280 if has_logits else None,\n                              num_classes=num_classes)\n    return model\n"
  },
  {
    "path": "pytorch_keypoint/DeepPose/README.md",
    "content": "# DeepPose\n## 对应论文\n论文名称：`DeepPose: Human Pose Estimation via Deep Neural Networks`  \n论文arxiv链接：[https://arxiv.org/abs/1312.4659](https://arxiv.org/abs/1312.4659)\n\n## 开发环境\n开发环境主要信息如下，其他Python依赖详情可见`requirements.txt`文件\n- Python3.10\n- torch2.0.1+cu118（建议大于等于此版本）\n- torchvision0.15.2+cu118（建议大于等于此版本）\n\n## 训练数据集准备\n该项目采用的训练数据是WFLW数据集（人脸98点检测），官方链接：[https://wywu.github.io/projects/LAB/WFLW.html](https://wywu.github.io/projects/LAB/WFLW.html)\n\n在官方网页下载数据集后解压并组织成如下目录形式：\n```\nWFLW\n ├── WFLW_annotations\n │   ├── list_98pt_rect_attr_train_test\n │   └── list_98pt_test\n └── WFLW_images\n     ├── 0--Parade\n     ├── 1--Handshaking\n     ├── 10--People_Marching\n     ├── 11--Meeting\n     ├── 12--Group\n     └── ......\n```\n\n## 预训练权重准备\n由于该项目默认使用的backbone是torchvision中的resnet50，在实例化模型时会自动下载在imagenet上的预训练权重。\n- 若训练环境可正常联网，则会自动下载预训练权重\n- 若训练环境无法正常链接网络，可预先在联网的机器上手动下载，下载链接：[https://download.pytorch.org/models/resnet50-11ad3fa6.pth](https://download.pytorch.org/models/resnet50-11ad3fa6.pth) 下载完成后将权重拷贝至训练服务器的`~/.cache/torch/hub/checkpoints`目录下即可\n\n## 启动训练\n将训练脚本中的`--dataset_dir`设置成自己构建的`WFLW`数据集绝对路径，例如`/home/wz/datasets/WFLW`\n### 单卡训练\n使用`train.py`脚本：\n```bash\npython train.py\n```\n### 多卡训练\n使用`train_multi_GPU.py`脚本：\n```\ntorchrun --nproc_per_node=8 train_multi_GPU.py\n```\n若要单独指定使用某些卡可在启动指令前加入`CUDA_VISIBLE_DEVICES`参数，例如：\n```\nCUDA_VISIBLE_DEVICES=4,5,6,7 torchrun --nproc_per_node=4 train_multi_GPU.py\n```\n\n## 训练好的权重下载地址\n若没有训练条件或者只想简单体验下，可使用本人训练好的模型权重（包含optimizer等信息故文件会略大），该权重在WFLW验证集上的NME指标为`0.048`，百度网盘下载地址：[https://pan.baidu.com/s/1L_zg-fmocEyzhSTxj8IDJw](https://pan.baidu.com/s/1L_zg-fmocEyzhSTxj8IDJw) \n提取码：8fux\n\n下载完成后在当前项目下创建一个`weights`文件夹，并将权重放置该文件夹内。\n\n## 测试图片\n可参考`predict.py`文件，将`img_path`设置成自己要预测的人脸图片（注意这里只支持单人脸的关键点检测，故需要提供单独的人脸图片，具体使用时可配合一个人脸检测器联合使用），例如输入图片：\n\n![test.jpg](./test_img.jpg)\n\n网络预测可视化结果为：\n\n![predict.jpg](./predict.jpg)\n\n## 导出ONNX模型（可选）\n若需要导出ONNX模型可使用`export_onnx.py`脚本。"
  },
  {
    "path": "pytorch_keypoint/DeepPose/datasets.py",
    "content": "import os\nfrom typing import List, Tuple\n\nimport cv2\nimport torch\nimport torch.utils.data as data\nimport numpy as np\n\n\nclass WFLWDataset(data.Dataset):\n    \"\"\"\n    https://wywu.github.io/projects/LAB/WFLW.html\n\n    dataset structure:\n\n    ├── WFLW_annotations\n    │   ├── list_98pt_rect_attr_train_test\n    │   └── list_98pt_test\n    └── WFLW_images\n        ├── 0--Parade\n        ├── 1--Handshaking\n        ├── 10--People_Marching\n        ├── 11--Meeting\n        ├── 12--Group\n        └── ......\n    \"\"\"\n    def __init__(self,\n                 root: str,\n                 train: bool = True,\n                 transforms=None):\n        super().__init__()\n        self.img_root = os.path.join(root, \"WFLW_images\")\n        assert os.path.exists(self.img_root), \"path '{}' does not exist.\".format(self.img_root)\n        ana_txt_name = \"list_98pt_rect_attr_train.txt\" if train else \"list_98pt_rect_attr_test.txt\"\n        self.anno_path = os.path.join(root, \"WFLW_annotations\", \"list_98pt_rect_attr_train_test\", ana_txt_name)\n        assert os.path.exists(self.anno_path), \"file '{}' does not exist.\".format(self.anno_path)\n\n        self.transforms = transforms\n        self.keypoints: List[np.ndarray] = []\n        self.face_rects: List[List[int]] = []\n        self.img_paths: List[str] = []\n        with open(self.anno_path, \"rt\") as f:\n            for line in f.readlines():\n                if not line.strip():\n                    continue\n\n                split_list = line.strip().split(\" \")\n                keypoint_ = self.get_98_points(split_list)\n                keypoint = np.array(keypoint_, dtype=np.float32).reshape((-1, 2))\n                face_rect = list(map(int, split_list[196: 196 + 4]))  # xmin, ymin, xmax, ymax\n                img_name = split_list[-1]\n\n                self.keypoints.append(keypoint)\n                self.face_rects.append(face_rect)\n                self.img_paths.append(os.path.join(self.img_root, img_name))\n\n    @staticmethod\n    def get_5_points(keypoints: List[str]) -> List[float]:\n        five_num = [76, 82, 54, 96, 97]\n        five_keypoint = []\n        for i in five_num:\n            five_keypoint.append(keypoints[i * 2])\n            five_keypoint.append(keypoints[i * 2 + 1])\n        return list(map(float, five_keypoint))\n\n    @staticmethod\n    def get_98_points(keypoints: List[str]) -> List[float]:\n        return list(map(float, keypoints[:196]))\n\n    @staticmethod\n    def collate_fn(batch_infos: List[Tuple[torch.Tensor, dict]]):\n        imgs, ori_keypoints, keypoints, m_invs = [], [], [], []\n        for info in batch_infos:\n            imgs.append(info[0])\n            ori_keypoints.append(info[1][\"ori_keypoint\"])\n            keypoints.append(info[1][\"keypoint\"])\n            m_invs.append(info[1][\"m_inv\"])\n\n        imgs_tensor = torch.stack(imgs)\n        keypoints_tensor = torch.stack(keypoints)\n        ori_keypoints_tensor = torch.stack(ori_keypoints)\n        m_invs_tensor = torch.stack(m_invs)\n\n        targets = {\"ori_keypoints\": ori_keypoints_tensor,\n                   \"keypoints\": keypoints_tensor,\n                   \"m_invs\": m_invs_tensor}\n        return imgs_tensor, targets\n\n    def __getitem__(self, idx: int):\n        img_bgr = cv2.imread(self.img_paths[idx], flags=cv2.IMREAD_COLOR)\n        img = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)\n\n        target = {\n            \"box\": self.face_rects[idx],\n            \"ori_keypoint\": self.keypoints[idx],\n            \"keypoint\": self.keypoints[idx]\n        }\n\n        if self.transforms is not None:\n            img, target = self.transforms(img, target)\n\n        return img, target\n\n    def __len__(self):\n        return len(self.keypoints)\n\n\nif __name__ == '__main__':\n    train_dataset = WFLWDataset(\"/home/wz/datasets/WFLW\", train=True)\n    print(len(train_dataset))\n\n    eval_dataset = WFLWDataset(\"/home/wz/datasets/WFLW\", train=False)\n    print(len(eval_dataset))\n\n    from utils import draw_keypoints\n    img, target = train_dataset[0]\n    keypoint = target[\"keypoint\"]\n    h, w, c = img.shape\n    keypoint[:, 0] /= w\n    keypoint[:, 1] /= h\n    draw_keypoints(img, keypoint, \"test_plot.jpg\", is_rel=True)\n"
  },
  {
    "path": "pytorch_keypoint/DeepPose/export_onnx.py",
    "content": "import os\nimport torch\nfrom model import create_deep_pose_model\n\n\ndef main():\n    img_hw = [256, 256]\n    num_keypoints = 98\n    weights_path = \"./weights/model_weights_209.pth\"\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n\n    # create model\n    model = create_deep_pose_model(num_keypoints=num_keypoints)\n\n    # load model weights\n    assert os.path.exists(weights_path), \"file: '{}' dose not exist.\".format(weights_path)\n    model.load_state_dict(torch.load(weights_path, map_location=\"cpu\")[\"model\"])\n    model.to(device)\n\n    model.eval()\n    with torch.inference_mode():\n        x = torch.randn(size=(1, 3, img_hw[0], img_hw[1]), device=device)\n        torch.onnx.export(model=model,\n                          args=(x,),\n                          f=\"deeppose.onnx\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_keypoint/DeepPose/model.py",
    "content": "import torch\nimport torch.nn as nn\nfrom torchvision.models import resnet50, ResNet50_Weights\n\n\ndef create_deep_pose_model(num_keypoints: int) -> nn.Module:\n    res50 = resnet50(ResNet50_Weights.IMAGENET1K_V2)\n    in_features = res50.fc.in_features\n    res50.fc = nn.Linear(in_features=in_features, out_features=num_keypoints * 2)\n\n    return res50\n\n\nif __name__ == '__main__':\n    torch.manual_seed(1234)\n    model = create_deep_pose_model(98)\n    model.eval()\n    with torch.inference_mode():\n        x = torch.randn(1, 3, 224, 224)\n        res = model(x)\n        print(res.shape)\n"
  },
  {
    "path": "pytorch_keypoint/DeepPose/predict.py",
    "content": "import os\n\nimport torch\nimport numpy as np\nfrom PIL import Image\n\nimport transforms\nfrom model import create_deep_pose_model\nfrom utils import draw_keypoints\n\n\ndef main():\n    img_hw = [256, 256]\n    num_keypoints = 98\n    img_path = \"./test_img.jpg\"\n    weights_path = \"./weights/model_weights_209.pth\"\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n\n    transform = transforms.Compose([\n        transforms.AffineTransform(scale_prob=0., rotate_prob=0., shift_prob=0., fixed_size=img_hw),\n        transforms.ToTensor(),\n        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n    ])\n\n    # load image\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = np.array(Image.open(img_path))\n    h, w, c = img.shape\n    target = {\"box\": [0, 0, w, h]}\n    img_tensor, target = transform(img, target=target)\n    # expand batch dimension\n    img_tensor = img_tensor.unsqueeze(0)\n\n    # create model\n    model = create_deep_pose_model(num_keypoints=num_keypoints)\n\n    # load model weights\n    assert os.path.exists(weights_path), \"file: '{}' dose not exist.\".format(weights_path)\n    model.load_state_dict(torch.load(weights_path, map_location=\"cpu\")[\"model\"])\n    model.to(device)\n\n    # prediction\n    model.eval()\n    with torch.inference_mode():\n        with torch.autocast(device_type=device.type):\n            pred = torch.squeeze(model(img_tensor.to(device))).reshape([-1, 2]).cpu().numpy()\n\n        wh_tensor = np.array(img_hw[::-1], dtype=np.float32).reshape([1, 2])\n        pred = pred * wh_tensor  # rel coord to abs coord\n        pred = transforms.affine_points_np(pred, target[\"m_inv\"].numpy())\n        draw_keypoints(img, coordinate=pred, save_path=\"predict.jpg\", radius=2)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_keypoint/DeepPose/requirements.txt",
    "content": "torch>=2.0.1\ntorchvision>=0.15.2\nopencv-python\ntqdm\ntensorboard"
  },
  {
    "path": "pytorch_keypoint/DeepPose/train.py",
    "content": "import os\n\nimport torch\nimport torch.amp\nfrom torch.utils.data import DataLoader\nfrom torch.utils.tensorboard import SummaryWriter\n\nimport transforms\nfrom model import create_deep_pose_model\nfrom datasets import WFLWDataset\nfrom train_utils.train_eval_utils import train_one_epoch, evaluate\n\n\ndef get_args_parser(add_help=True):\n    import argparse\n\n    parser = argparse.ArgumentParser(description=\"PyTorch DeepPose Training\", add_help=add_help)\n    parser.add_argument(\"--dataset_dir\", type=str, default=\"/home/wz/datasets/WFLW\", help=\"WFLW dataset directory\")\n    parser.add_argument(\"--device\", type=str, default=\"cuda:0\", help=\"training device, e.g. cpu, cuda:0\")\n    parser.add_argument(\"--save_weights_dir\", type=str, default=\"./weights\", help=\"save dir for model weights\")\n    parser.add_argument(\"--save_freq\", type=int, default=10, help=\"save frequency for weights and generated imgs\")\n    parser.add_argument(\"--eval_freq\", type=int, default=5, help=\"evaluate frequency\")\n    parser.add_argument('--img_hw', default=[256, 256], nargs='+', type=int, help='training image size[h, w]')\n    parser.add_argument(\"--epochs\", type=int, default=210, help=\"number of epochs of training\")\n    parser.add_argument(\"--batch_size\", type=int, default=32, help=\"size of the batches\")\n    parser.add_argument(\"--num_workers\", type=int, default=8, help=\"number of workers, default: 8\")\n    parser.add_argument(\"--num_keypoints\", type=int, default=98, help=\"number of keypoints\")\n    parser.add_argument(\"--lr\", type=float, default=5e-4, help=\"Adam: learning rate\")\n    parser.add_argument('--lr_steps', default=[170, 200], nargs='+', type=int,\n                        help='decrease lr every step-size epochs')\n    parser.add_argument(\"--warmup_epoch\", type=int, default=10, help=\"number of warmup epoch for training\")\n    parser.add_argument('--resume', default='', type=str, help='resume from checkpoint')\n    parser.add_argument('--test_only', action=\"store_true\", help='Only test the model')\n\n    return parser\n\n\ndef main(args):\n    torch.manual_seed(1234)\n    dataset_dir = args.dataset_dir\n    save_weights_dir = args.save_weights_dir\n    save_freq = args.save_freq\n    eval_freq = args.eval_freq\n    num_keypoints = args.num_keypoints\n    num_workers = args.num_workers\n    epochs = args.epochs\n    bs = args.batch_size\n    start_epoch = 0\n    img_hw = args.img_hw\n    os.makedirs(save_weights_dir, exist_ok=True)\n\n    if \"cuda\" in args.device and not torch.cuda.is_available():\n        device = torch.device(\"cpu\")\n    else:\n        device = torch.device(args.device)\n    print(f\"using device: {device} for training.\")\n\n    # tensorboard writer\n    tb_writer = SummaryWriter()\n\n    # create model\n    model = create_deep_pose_model(num_keypoints)\n    model.to(device)\n\n    # config dataset and dataloader\n    data_transform = {\n        \"train\": transforms.Compose([\n            transforms.AffineTransform(scale_factor=(0.65, 1.35), rotate=45, shift_factor=0.15, fixed_size=img_hw),\n            transforms.RandomHorizontalFlip(0.5),\n            transforms.ToTensor(),\n            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n        ]),\n        \"val\": transforms.Compose([\n            transforms.AffineTransform(scale_prob=0., rotate_prob=0., shift_prob=0., fixed_size=img_hw),\n            transforms.ToTensor(),\n            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n        ])\n    }\n    train_dataset = WFLWDataset(root=dataset_dir,\n                                train=True,\n                                transforms=data_transform[\"train\"])\n    val_dataset = WFLWDataset(root=dataset_dir,\n                              train=False,\n                              transforms=data_transform[\"val\"])\n\n    train_loader = DataLoader(train_dataset,\n                              batch_size=bs,\n                              shuffle=True,\n                              pin_memory=True,\n                              num_workers=num_workers,\n                              collate_fn=WFLWDataset.collate_fn,\n                              persistent_workers=True)\n\n    val_loader = DataLoader(val_dataset,\n                            batch_size=bs,\n                            shuffle=False,\n                            pin_memory=True,\n                            num_workers=num_workers,\n                            collate_fn=WFLWDataset.collate_fn,\n                            persistent_workers=True)\n\n    # define optimizers\n    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)\n\n    # define learning rate scheduler\n    warmup_scheduler = torch.optim.lr_scheduler.LinearLR(\n        optimizer=optimizer,\n        start_factor=0.01,\n        end_factor=1.0,\n        total_iters=len(train_loader) * args.warmup_epoch\n    )\n    multi_step_scheduler = torch.optim.lr_scheduler.MultiStepLR(\n        optimizer=optimizer,\n        milestones=[len(train_loader) * i for i in args.lr_steps],\n        gamma=0.1\n    )\n\n    lr_scheduler = torch.optim.lr_scheduler.ChainedScheduler([warmup_scheduler, multi_step_scheduler])\n\n    if args.resume:\n        assert os.path.exists(args.resume)\n        checkpoint = torch.load(args.resume, map_location='cpu')\n        model.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        start_epoch = checkpoint['epoch'] + 1\n        print(\"the training process from epoch{}...\".format(start_epoch))\n\n    if args.test_only:\n        evaluate(model=model,\n                 epoch=start_epoch,\n                 val_loader=val_loader,\n                 device=device,\n                 tb_writer=tb_writer,\n                 affine_points_torch_func=transforms.affine_points_torch,\n                 num_keypoints=num_keypoints,\n                 img_hw=img_hw)\n        return\n\n    for epoch in range(start_epoch, epochs):\n        # train\n        train_one_epoch(model=model,\n                        epoch=epoch,\n                        train_loader=train_loader,\n                        device=device,\n                        optimizer=optimizer,\n                        lr_scheduler=lr_scheduler,\n                        tb_writer=tb_writer,\n                        num_keypoints=num_keypoints,\n                        img_hw=img_hw)\n\n        # eval\n        if epoch % eval_freq == 0 or epoch == args.epochs - 1:\n            evaluate(model=model,\n                     epoch=epoch,\n                     val_loader=val_loader,\n                     device=device,\n                     tb_writer=tb_writer,\n                     affine_points_torch_func=transforms.affine_points_torch,\n                     num_keypoints=num_keypoints,\n                     img_hw=img_hw)\n\n        # save weights\n        if epoch % save_freq == 0 or epoch == args.epochs - 1:\n            save_files = {\n                'model': model.state_dict(),\n                'optimizer': optimizer.state_dict(),\n                'lr_scheduler': lr_scheduler.state_dict(),\n                'epoch': epoch\n            }\n            torch.save(save_files, os.path.join(save_weights_dir, f\"model_weights_{epoch}.pth\"))\n\n\nif __name__ == '__main__':\n    args = get_args_parser().parse_args()\n    main(args)\n"
  },
  {
    "path": "pytorch_keypoint/DeepPose/train_multi_GPU.py",
    "content": "import os\n\nimport torch\nimport torch.amp\nfrom torch.utils.data import DataLoader, DistributedSampler, BatchSampler\nfrom torch.utils.tensorboard import SummaryWriter\n\nimport transforms\nfrom model import create_deep_pose_model\nfrom datasets import WFLWDataset\nfrom train_utils.train_eval_utils import train_one_epoch, evaluate\nfrom train_utils.distributed_utils import init_distributed_mode, is_main_process\n\n\ndef get_args_parser(add_help=True):\n    import argparse\n\n    parser = argparse.ArgumentParser(description=\"PyTorch DeepPose Training\", add_help=add_help)\n    parser.add_argument(\"--dataset_dir\", type=str, default=\"/home/wz/datasets/WFLW\", help=\"WFLW dataset directory\")\n    parser.add_argument(\"--device\", type=str, default=\"cuda\", help=\"training device, e.g. cpu, cuda\")\n    parser.add_argument(\"--save_weights_dir\", type=str, default=\"./weights\", help=\"save dir for model weights\")\n    parser.add_argument(\"--save_freq\", type=int, default=5, help=\"save frequency for weights and generated imgs\")\n    parser.add_argument(\"--eval_freq\", type=int, default=5, help=\"evaluate frequency\")\n    parser.add_argument('--img_hw', default=[256, 256], nargs='+', type=int, help='training image size[h, w]')\n    parser.add_argument(\"--epochs\", type=int, default=210, help=\"number of epochs of training\")\n    parser.add_argument(\"--batch_size\", type=int, default=32, help=\"size of the batches\")\n    parser.add_argument(\"--num_workers\", type=int, default=8, help=\"number of workers, default: 8\")\n    parser.add_argument(\"--num_keypoints\", type=int, default=98, help=\"number of keypoints\")\n    parser.add_argument(\"--lr\", type=float, default=5e-4, help=\"Adam: learning rate\")\n    parser.add_argument('--lr_steps', default=[170, 200], nargs='+', type=int,\n                        help='decrease lr every step-size epochs')\n    parser.add_argument(\"--warmup_epoch\", type=int, default=10, help=\"number of warmup epoch for training\")\n    parser.add_argument('--resume', default='', type=str, help='resume from checkpoint')\n    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')\n    parser.add_argument('--test_only', action=\"store_true\", help='Only test the model')\n\n    return parser\n\n\ndef main(args):\n    torch.manual_seed(1234)\n    init_distributed_mode(args)\n    if not args.distributed:\n        raise EnvironmentError(\"not support distributed training.\")\n\n    dataset_dir = args.dataset_dir\n    save_weights_dir = args.save_weights_dir\n    save_freq = args.save_freq\n    eval_freq = args.eval_freq\n    num_keypoints = args.num_keypoints\n    num_workers = args.num_workers\n    epochs = args.epochs\n    bs = args.batch_size\n    start_epoch = 0\n    img_hw = args.img_hw\n    device = torch.device(args.device)\n    os.makedirs(save_weights_dir, exist_ok=True)\n\n    # adjust learning rate\n    args.lr = args.lr * args.world_size\n\n    tb_writer = None\n    if is_main_process():\n        # tensorboard writer\n        tb_writer = SummaryWriter()\n\n    # create model\n    model = create_deep_pose_model(num_keypoints)\n    model.to(device)\n    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])\n\n    # config dataset and dataloader\n    data_transform = {\n        \"train\": transforms.Compose([\n            transforms.AffineTransform(scale_factor=(0.65, 1.35), rotate=45, shift_factor=0.15, fixed_size=img_hw),\n            transforms.RandomHorizontalFlip(0.5),\n            transforms.ToTensor(),\n            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n        ]),\n        \"val\": transforms.Compose([\n            transforms.AffineTransform(scale_prob=0., rotate_prob=0., shift_prob=0., fixed_size=img_hw),\n            transforms.ToTensor(),\n            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n        ])\n    }\n    train_dataset = WFLWDataset(root=dataset_dir,\n                                train=True,\n                                transforms=data_transform[\"train\"])\n    val_dataset = WFLWDataset(root=dataset_dir,\n                              train=False,\n                              transforms=data_transform[\"val\"])\n\n    train_sampler = DistributedSampler(train_dataset)\n    val_sampler = DistributedSampler(val_dataset)\n    train_batch_sampler = BatchSampler(train_sampler, args.batch_size, drop_last=True)\n\n    train_loader = DataLoader(train_dataset,\n                              batch_sampler=train_batch_sampler,\n                              pin_memory=True,\n                              num_workers=num_workers,\n                              collate_fn=WFLWDataset.collate_fn,\n                              persistent_workers=True)\n\n    val_loader = DataLoader(val_dataset,\n                            batch_size=bs,\n                            sampler=val_sampler,\n                            shuffle=False,\n                            pin_memory=True,\n                            num_workers=num_workers,\n                            collate_fn=WFLWDataset.collate_fn,\n                            persistent_workers=True)\n\n    # define optimizers\n    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)\n\n    # define learning rate scheduler\n    warmup_scheduler = torch.optim.lr_scheduler.LinearLR(\n        optimizer=optimizer,\n        start_factor=0.01,\n        end_factor=1.0,\n        total_iters=len(train_loader) * args.warmup_epoch\n    )\n    multi_step_scheduler = torch.optim.lr_scheduler.MultiStepLR(\n        optimizer=optimizer,\n        milestones=[len(train_loader) * i for i in args.lr_steps],\n        gamma=0.1\n    )\n\n    lr_scheduler = torch.optim.lr_scheduler.ChainedScheduler([warmup_scheduler, multi_step_scheduler])\n\n    if args.resume:\n        assert os.path.exists(args.resume)\n        checkpoint = torch.load(args.resume, map_location='cpu')\n        model.module.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        start_epoch = checkpoint['epoch'] + 1\n        print(\"the training process from epoch{}...\".format(start_epoch))\n\n    if args.test_only:\n        evaluate(model=model,\n                 epoch=start_epoch,\n                 val_loader=val_loader,\n                 device=device,\n                 tb_writer=tb_writer,\n                 affine_points_torch_func=transforms.affine_points_torch,\n                 num_keypoints=num_keypoints,\n                 img_hw=img_hw)\n        return\n\n    for epoch in range(start_epoch, epochs):\n        # train\n        train_sampler.set_epoch(epoch)  # shuffle training data\n        train_one_epoch(model=model,\n                        epoch=epoch,\n                        train_loader=train_loader,\n                        device=device,\n                        optimizer=optimizer,\n                        lr_scheduler=lr_scheduler,\n                        tb_writer=tb_writer,\n                        num_keypoints=num_keypoints,\n                        img_hw=img_hw)\n\n        # eval\n        if epoch % eval_freq == 0 or epoch == args.epochs - 1:\n            evaluate(model=model,\n                     epoch=epoch,\n                     val_loader=val_loader,\n                     device=device,\n                     tb_writer=tb_writer,\n                     affine_points_torch_func=transforms.affine_points_torch,\n                     num_keypoints=num_keypoints,\n                     img_hw=img_hw)\n\n        # save weights\n        if is_main_process() and (epoch % save_freq == 0 or epoch == args.epochs - 1):\n            save_files = {\n                'model': model.module.state_dict(),\n                'optimizer': optimizer.state_dict(),\n                'lr_scheduler': lr_scheduler.state_dict(),\n                'epoch': epoch\n            }\n            torch.save(save_files, os.path.join(save_weights_dir, f\"model_weights_{epoch}.pth\"))\n\n\nif __name__ == '__main__':\n    args = get_args_parser().parse_args()\n    main(args)\n"
  },
  {
    "path": "pytorch_keypoint/DeepPose/train_utils/distributed_utils.py",
    "content": "import os\n\nimport torch\nimport torch.distributed as dist\n\n\ndef reduce_value(input_value: torch.Tensor, average=True) -> torch.Tensor:\n    \"\"\"\n    Args:\n        input_value (Tensor): all the values will be reduced\n        average (bool): whether to do average or sum\n    Reduce the values from all processes so that all processes\n    have the averaged results.\n    \"\"\"\n    world_size = get_world_size()\n    if world_size < 2:  # 单GPU的情况\n        return input_value\n\n    with torch.inference_mode():  # 多GPU的情况\n        dist.all_reduce(input_value)\n        if average:\n            input_value /= world_size\n\n        return input_value\n\n\ndef setup_for_distributed(is_master):\n    \"\"\"\n    This function disables when not in master process\n    \"\"\"\n    import builtins as __builtin__\n    builtin_print = __builtin__.print\n\n    def print(*args, **kwargs):\n        force = kwargs.pop('force', False)\n        if is_master or force:\n            builtin_print(*args, **kwargs)\n\n    __builtin__.print = print\n\n\ndef is_dist_avail_and_initialized():\n    \"\"\"检查是否支持分布式环境\"\"\"\n    if not dist.is_available():\n        return False\n    if not dist.is_initialized():\n        return False\n    return True\n\n\ndef get_world_size():\n    if not is_dist_avail_and_initialized():\n        return 1\n    return dist.get_world_size()\n\n\ndef get_rank():\n    if not is_dist_avail_and_initialized():\n        return 0\n    return dist.get_rank()\n\n\ndef is_main_process():\n    return get_rank() == 0\n\n\ndef init_distributed_mode(args):\n    if not torch.cuda.is_available():\n        print('No available device')\n        args.distributed = False\n        return\n\n    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:\n        args.rank = int(os.environ[\"RANK\"])\n        args.world_size = int(os.environ['WORLD_SIZE'])\n        args.gpu = int(os.environ['LOCAL_RANK'])\n    elif 'SLURM_PROCID' in os.environ:\n        args.rank = int(os.environ['SLURM_PROCID'])\n        args.gpu = args.rank % torch.cuda.device_count()\n    else:\n        print('Not using distributed mode')\n        args.distributed = False\n        return\n\n    args.distributed = True\n\n    torch.cuda.set_device(args.gpu)\n    args.dist_backend = 'nccl'\n    print(f'| distributed init (rank {args.rank}): {args.dist_url}', flush=True)\n    torch.distributed.init_process_group(backend=args.dist_backend,\n                                         init_method=args.dist_url,\n                                         world_size=args.world_size,\n                                         rank=args.rank)\n    torch.distributed.barrier()\n    setup_for_distributed(args.rank == 0)\n"
  },
  {
    "path": "pytorch_keypoint/DeepPose/train_utils/losses.py",
    "content": "import math\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\n\nclass L1Loss(nn.Module):\n    def __init__(self) -> None:\n        super().__init__()\n\n    def forward(self, pred: torch.Tensor, label: torch.Tensor, mask: torch = None) -> torch.Tensor:\n        \"\"\"\n        Args:\n            pred [N, K, 2]\n            label [N, K, 2]\n            mask [N, K]\n        \"\"\"\n        losses = F.l1_loss(pred, label, reduction=\"none\")\n        if mask is not None:\n            # filter invalid keypoints(e.g. out of range)\n            losses = losses * mask.unsqueeze(2)\n\n        return torch.mean(torch.sum(losses, dim=(1, 2)), dim=0)\n\n\nclass SmoothL1Loss(nn.Module):\n    def __init__(self) -> None:\n        super().__init__()\n\n    def forward(self, pred: torch.Tensor, label: torch.Tensor, mask: torch = None) -> torch.Tensor:\n        \"\"\"\n        Args:\n            pred [N, K, 2]\n            label [N, K, 2]\n            mask [N, K]\n        \"\"\"\n        losses = F.smooth_l1_loss(pred, label, reduction=\"none\")\n        if mask is not None:\n            # filter invalid keypoints(e.g. out of range)\n            losses = losses * mask.unsqueeze(2)\n\n        return torch.mean(torch.sum(losses, dim=(1, 2)), dim=0)\n\n\nclass L2Loss(nn.Module):\n    def __init__(self) -> None:\n        super().__init__()\n\n    def forward(self, pred: torch.Tensor, label: torch.Tensor, mask: torch = None) -> torch.Tensor:\n        \"\"\"\n        Args:\n            pred [N, K, 2]\n            label [N, K, 2]\n            mask [N, K]\n        \"\"\"\n        losses = F.mse_loss(pred, label, reduction=\"none\")\n        if mask is not None:\n            # filter invalid keypoints(e.g. out of range)\n            losses = losses * mask.unsqueeze(2)\n\n        return torch.mean(torch.sum(losses, dim=(1, 2)), dim=0)\n\n\nclass WingLoss(nn.Module):\n    \"\"\"refer https://github.com/TropComplique/wing-loss/blob/master/loss.py\n    \"\"\"\n    def __init__(self, w: float = 10.0, epsilon: float = 2.0) -> None:\n        super().__init__()\n        self.w = w\n        self.epsilon = epsilon\n        self.C = w * (1.0 - math.log(1.0 + w / epsilon))\n\n    def forward(self,\n                pred: torch.Tensor,\n                label: torch.Tensor,\n                wh_tensor: torch.Tensor,\n                mask: torch = None) -> torch.Tensor:\n        \"\"\"\n        Args:\n            pred [N, K, 2]\n            wh_tensor [1, 1, 2]\n            label [N, K, 2]\n            mask [N, K]\n        \"\"\"\n        delta = (pred - label).abs() * wh_tensor  # rel to abs\n        losses = torch.where(condition=self.w > delta,\n                             input=self.w * torch.log(1.0 + delta / self.epsilon),\n                             other=delta - self.C)\n        if mask is not None:\n            # filter invalid keypoints(e.g. out of range)\n            losses = losses * mask.unsqueeze(2)\n\n        return torch.mean(torch.sum(losses, dim=(1, 2)), dim=0)\n\n\nclass SoftWingLoss(nn.Module):\n    \"\"\"refer mmpose/models/losses/regression_loss.py\n    \"\"\"\n    def __init__(self, omega1: float = 2.0, omega2: float = 20.0, epsilon: float = 0.5) -> None:\n        super().__init__()\n        self.omega1 = omega1\n        self.omega2 = omega2\n        self.epsilon = epsilon\n        self.B = omega1 - omega2 * math.log(1.0 + omega1 / epsilon)\n\n    def forward(self,\n                pred: torch.Tensor,\n                label: torch.Tensor,\n                wh_tensor: torch.Tensor,\n                mask: torch = None) -> torch.Tensor:\n        \"\"\"\n        Args:\n            pred [N, K, 2]\n            label [N, K, 2]\n            wh_tensor [1, 1, 2]\n            mask [N, K]\n        \"\"\"\n        delta = (pred - label).abs() * wh_tensor  # rel to abs\n        losses = torch.where(condition=delta < self.omega1,\n                             input=delta,\n                             other=self.omega2 * torch.log(1.0 + delta / self.epsilon) + self.B)\n        if mask is not None:\n            # filter invalid keypoints(e.g. out of range)\n            losses = losses * mask.unsqueeze(2)\n\n        loss = torch.mean(torch.sum(losses, dim=(1, 2)), dim=0)\n        return loss\n"
  },
  {
    "path": "pytorch_keypoint/DeepPose/train_utils/metrics.py",
    "content": "import torch\n\nfrom .distributed_utils import reduce_value, is_dist_avail_and_initialized\n\n\nclass NMEMetric:\n    def __init__(self, device: torch.device) -> None:\n        # 两眼外角点对应keypoint索引\n        self.keypoint_idxs = [60, 72]\n        self.nme_accumulator: float = 0.\n        self.counter: float = 0.\n        self.device = device\n\n    def update(self, pred: torch.Tensor, gt: torch.Tensor, mask: torch.Tensor = None):\n        \"\"\"\n        Args:\n            pred (shape [N, K, 2]): pred keypoints\n            gt (shape [N, K, 2]): gt keypoints\n            mask (shape [N, K]): valid keypoints mask\n        \"\"\"\n        # ion: inter-ocular distance normalized error\n        ion = torch.linalg.norm(gt[:, self.keypoint_idxs[0]] - gt[:, self.keypoint_idxs[1]], dim=1)\n\n        valid_ion_mask = ion > 0\n        if mask is None:\n            mask = valid_ion_mask\n        else:\n            mask = torch.logical_and(mask, valid_ion_mask.unsqueeze_(dim=1)).sum(dim=1) > 0\n        num_valid = mask.sum().item()\n\n        # equal: (pred - gt).pow(2).sum(dim=2).pow(0.5).mean(dim=1)\n        l2_dis = torch.linalg.norm(pred - gt, dim=2)[mask].mean(dim=1)  # [N]\n\n        # avoid divide by zero\n        ion = ion[mask]  # [N]\n\n        self.nme_accumulator += l2_dis.div(ion).sum().item()\n        self.counter += num_valid\n\n    def evaluate(self):\n        return self.nme_accumulator / self.counter\n\n    def synchronize_results(self):\n        if is_dist_avail_and_initialized():\n            self.nme_accumulator = reduce_value(\n                torch.as_tensor(self.nme_accumulator, device=self.device),\n                average=False\n            ).item()\n\n            self.counter = reduce_value(\n                torch.as_tensor(self.counter, device=self.device),\n                average=False\n            )\n\n\nif __name__ == '__main__':\n    metric = NMEMetric()\n    metric.update(pred=torch.randn(32, 98, 2),\n                  gt=torch.randn(32, 98, 2),\n                  mask=torch.randn(32, 98))\n    print(metric.evaluate())\n"
  },
  {
    "path": "pytorch_keypoint/DeepPose/train_utils/train_eval_utils.py",
    "content": "import sys\nimport math\nfrom typing import Callable, List\n\nfrom tqdm import tqdm\nimport torch\nfrom torch.utils.data import DataLoader\nfrom torch.utils.tensorboard import SummaryWriter\n\nfrom .losses import WingLoss\nfrom .metrics import NMEMetric\nfrom .distributed_utils import is_main_process, reduce_value\n\n\ndef train_one_epoch(model: torch.nn.Module,\n                    epoch: int,\n                    train_loader: DataLoader,\n                    device: torch.device,\n                    optimizer: torch.optim.Optimizer,\n                    lr_scheduler: torch.optim.lr_scheduler.LRScheduler,\n                    tb_writer: SummaryWriter,\n                    num_keypoints: int,\n                    img_hw: List[int]) -> None:\n    # define loss function\n    loss_func = WingLoss()\n    wh_tensor = torch.as_tensor(img_hw[::-1], dtype=torch.float32, device=device).reshape([1, 1, 2])\n\n    model.train()\n    train_bar = train_loader\n    if is_main_process():\n        train_bar = tqdm(train_loader, file=sys.stdout)\n\n    for step, (imgs, targets) in enumerate(train_bar):\n        imgs = imgs.to(device)\n        labels = targets[\"keypoints\"].to(device)\n\n        optimizer.zero_grad()\n        # use mixed precision to speed up training\n        with torch.autocast(device_type=device.type):\n            pred: torch.Tensor = model(imgs)\n            loss: torch.Tensor = loss_func(pred.reshape((-1, num_keypoints, 2)), labels, wh_tensor)\n\n        loss_value = reduce_value(loss).item()\n        if not math.isfinite(loss_value):\n            print(\"Loss is {}, stopping training\".format(loss_value))\n            sys.exit(1)\n\n        loss.backward()\n        optimizer.step()\n        lr_scheduler.step()\n\n        if is_main_process():\n            train_bar.desc = f\"train epoch[{epoch}] loss:{loss_value:.3f}\"\n\n            global_step = epoch * len(train_loader) + step\n            tb_writer.add_scalar(\"train loss\", loss.item(), global_step=global_step)\n            tb_writer.add_scalar(\"learning rate\", optimizer.param_groups[0][\"lr\"], global_step=global_step)\n\n\n@torch.inference_mode()\ndef evaluate(model: torch.nn.Module,\n             epoch: int,\n             val_loader: DataLoader,\n             device: torch.device,\n             tb_writer: SummaryWriter,\n             affine_points_torch_func: Callable,\n             num_keypoints: int,\n             img_hw: List[int]) -> None:\n    model.eval()\n    metric = NMEMetric(device=device)\n    wh_tensor = torch.as_tensor(img_hw[::-1], dtype=torch.float32, device=device).reshape([1, 1, 2])\n    eval_bar = val_loader\n    if is_main_process():\n        eval_bar = tqdm(val_loader, file=sys.stdout, desc=\"evaluation\")\n\n    for step, (imgs, targets) in enumerate(eval_bar):\n        imgs = imgs.to(device)\n        m_invs = targets[\"m_invs\"].to(device)\n        labels = targets[\"ori_keypoints\"].to(device)\n\n        pred = model(imgs)\n        pred = pred.reshape((-1, num_keypoints, 2))  # [N, K, 2]\n        pred = pred * wh_tensor  # rel coord to abs coord\n        pred = affine_points_torch_func(pred, m_invs)\n\n        metric.update(pred, labels)\n\n    metric.synchronize_results()\n    if is_main_process():\n        nme = metric.evaluate()\n        tb_writer.add_scalar(\"evaluation nme\", nme, global_step=epoch)\n        print(f\"evaluation NME[{epoch}]: {nme:.3f}\")\n"
  },
  {
    "path": "pytorch_keypoint/DeepPose/transforms.py",
    "content": "import math\nimport random\nfrom typing import Tuple\n\nimport cv2\nimport torch\nimport numpy as np\n\nfrom wflw_horizontal_flip_indices import wflw_flip_indices_dict\n\n\ndef adjust_box(xmin: int, ymin: int, xmax: int, ymax: int, fixed_size: Tuple[int, int]):\n    \"\"\"通过增加w或者h的方式保证输入图片的长宽比固定\"\"\"\n    w = xmax - xmin\n    h = ymax - ymin\n\n    hw_ratio = fixed_size[0] / fixed_size[1]\n    if h / w > hw_ratio:\n        # 需要在w方向padding\n        wi = h / hw_ratio\n        pad_w = (wi - w) / 2\n        xmin = xmin - pad_w\n        xmax = xmax + pad_w\n    else:\n        # 需要在h方向padding\n        hi = w * hw_ratio\n        pad_h = (hi - h) / 2\n        ymin = ymin - pad_h\n        ymax = ymax + pad_h\n\n    return xmin, ymin, xmax, ymax\n\n\ndef affine_points_np(keypoint: np.ndarray, m: np.ndarray) -> np.ndarray:\n    \"\"\"\n    Args:\n        keypoint [k, 2]\n        m [2, 3]\n    \"\"\"\n    ones = np.ones((keypoint.shape[0], 1), dtype=np.float32)\n    keypoint = np.concatenate([keypoint, ones], axis=1)  # [k, 3]\n    new_keypoint = np.matmul(keypoint, m.T)\n    return new_keypoint\n\n\ndef affine_points_torch(keypoint: torch.Tensor, m: torch.Tensor) -> torch.Tensor:\n    \"\"\"\n    Args:\n        keypoint [n, k, 2]\n        m [n, 2, 3]\n    \"\"\"\n    dtype = keypoint.dtype\n    device = keypoint.device\n\n    n, k, _ = keypoint.shape\n    ones = torch.ones(size=(n, k, 1), dtype=dtype, device=device)\n    keypoint = torch.concat([keypoint, ones], dim=2)  # [n, k, 3]\n    new_keypoint = torch.matmul(keypoint, m.transpose(1, 2))\n    return new_keypoint\n\n\nclass Compose(object):\n    \"\"\"组合多个transform函数\"\"\"\n    def __init__(self, transforms):\n        self.transforms = transforms\n\n    def __call__(self, image, target):\n        for t in self.transforms:\n            image, target = t(image, target)\n        return image, target\n\n\nclass Resize(object):\n    def __init__(self, h: int, w: int):\n        self.h = h\n        self.w = w\n\n    def __call__(self, image: np.ndarray, target):\n        image = cv2.resize(image, dsize=(self.w, self.h), fx=0, fy=0,\n                           interpolation=cv2.INTER_LINEAR)\n\n        return image, target\n\n\nclass ToTensor(object):\n    \"\"\"将opencv图像转为Tensor, HWC2CHW, 并缩放数值至0~1\"\"\"\n    def __call__(self, image, target):\n        image = torch.from_numpy(image).permute((2, 0, 1))\n        image = image.to(torch.float32) / 255.\n\n        if \"ori_keypoint\" in target and \"keypoint\" in target:\n            target[\"ori_keypoint\"] = torch.from_numpy(target[\"ori_keypoint\"])\n            target[\"keypoint\"] = torch.from_numpy(target[\"keypoint\"])\n        target[\"m_inv\"] = torch.from_numpy(target[\"m_inv\"])\n        return image, target\n\n\nclass Normalize(object):\n    def __init__(self, mean=None, std=None):\n        self.mean = torch.as_tensor(mean, dtype=torch.float32).reshape((3, 1, 1))\n        self.std = torch.as_tensor(std, dtype=torch.float32).reshape((3, 1, 1))\n\n    def __call__(self, image: torch.Tensor, target: dict):\n        image.sub_(self.mean).div_(self.std)\n\n        if \"keypoint\" in target:\n            _, h, w = image.shape\n            keypoint = target[\"keypoint\"]\n            keypoint[:, 0] /= w\n            keypoint[:, 1] /= h\n            target[\"keypoint\"] = keypoint\n        return image, target\n\n\nclass RandomHorizontalFlip(object):\n    \"\"\"随机对输入图片进行水平翻转\"\"\"\n    def __init__(self, p: float = 0.5):\n        self.p = p\n        self.wflw_flip_ids = list(wflw_flip_indices_dict.values())\n\n    def __call__(self, image: np.ndarray, target: dict):\n        if random.random() < self.p:\n            # [h, w, c]\n            image = np.ascontiguousarray(np.flip(image, axis=[1]))\n\n            # [k, 2]\n            if \"keypoint\" in target:\n                _, w, _ = image.shape\n                keypoint: torch.Tensor = target[\"keypoint\"]\n                keypoint = keypoint[self.wflw_flip_ids]\n                keypoint[:, 0] = w - keypoint[:, 0]\n                target[\"keypoint\"] = keypoint\n\n        return image, target\n\n\nclass AffineTransform(object):\n    \"\"\"shift+scale+rotation\"\"\"\n    def __init__(self,\n                 scale_factor: Tuple[float, float] = (0.65, 1.35),\n                 scale_prob: float = 1.,\n                 rotate: int = 45,\n                 rotate_prob: float = 0.6,\n                 shift_factor: float = 0.15,\n                 shift_prob: float = 0.3,\n                 fixed_size: Tuple[int, int] = (256, 256)):\n        self.scale_factor = scale_factor\n        self.scale_prob = scale_prob\n        self.rotate = rotate\n        self.rotate_prob = rotate_prob\n        self.shift_factor = shift_factor\n        self.shift_prob = shift_prob\n        self.fixed_size = fixed_size  # (h, w)\n\n    def __call__(self, img: np.ndarray, target: dict):\n        src_xmin, src_ymin, src_xmax, src_ymax = adjust_box(*target[\"box\"], fixed_size=self.fixed_size)\n        src_w = src_xmax - src_xmin\n        src_h = src_ymax - src_ymin\n\n        if random.random() < self.shift_prob:\n            shift_w_factor = random.uniform(-self.shift_factor, self.shift_factor)\n            shift_h_factor = random.uniform(-self.shift_factor, self.shift_factor)\n            src_xmin -= int(src_w * shift_w_factor)\n            src_xmax -= int(src_w * shift_w_factor)\n            src_ymin -= int(src_h * shift_h_factor)\n            src_ymax -= int(src_h * shift_h_factor)\n\n        src_center = np.array([(src_xmin + src_xmax) / 2, (src_ymin + src_ymax) / 2], dtype=np.float32)\n        src_p2 = src_center + np.array([0, -src_h / 2], dtype=np.float32)  # top middle\n        src_p3 = src_center + np.array([src_w / 2, 0], dtype=np.float32)   # right middle\n\n        dst_center = np.array([(self.fixed_size[1] - 1) / 2, (self.fixed_size[0] - 1) / 2], dtype=np.float32)\n        dst_p2 = np.array([(self.fixed_size[1] - 1) / 2, 0], dtype=np.float32)  # top middle\n        dst_p3 = np.array([self.fixed_size[1] - 1, (self.fixed_size[0] - 1) / 2], dtype=np.float32)  # right middle\n\n        if random.random() < self.scale_prob:\n            scale = random.uniform(*self.scale_factor)\n            src_w = src_w * scale\n            src_h = src_h * scale\n            src_p2 = src_center + np.array([0, -src_h / 2], dtype=np.float32)  # top middle\n            src_p3 = src_center + np.array([src_w / 2, 0], dtype=np.float32)   # right middle\n\n        if random.random() < self.rotate_prob:\n            angle = random.randint(-self.rotate, self.rotate)  # 角度制\n            angle = angle / 180 * math.pi  # 弧度制\n            src_p2 = src_center + np.array([src_h / 2 * math.sin(angle),\n                                            -src_h / 2 * math.cos(angle)], dtype=np.float32)\n            src_p3 = src_center + np.array([src_w / 2 * math.cos(angle),\n                                            src_w / 2 * math.sin(angle)], dtype=np.float32)\n\n        src = np.stack([src_center, src_p2, src_p3])\n        dst = np.stack([dst_center, dst_p2, dst_p3])\n\n        m = cv2.getAffineTransform(src, dst).astype(np.float32)  # 计算正向仿射变换矩阵\n        m_inv = cv2.getAffineTransform(dst, src).astype(np.float32)  # 计算逆向仿射变换矩阵，方便后续还原\n\n        # 对图像进行仿射变换\n        warp_img = cv2.warpAffine(src=img,\n                                  M=m,\n                                  dsize=tuple(self.fixed_size[::-1]),  # [w, h]\n                                  borderMode=cv2.BORDER_CONSTANT,\n                                  borderValue=(0, 0, 0),\n                                  flags=cv2.INTER_LINEAR)\n\n        if \"keypoint\" in target:\n            keypoint = target[\"keypoint\"]\n            keypoint = affine_points_np(keypoint, m)\n            target[\"keypoint\"] = keypoint\n\n        # from utils import draw_keypoints\n        # keypoint[:, 0] /= self.fixed_size[1]\n        # keypoint[:, 1] /= self.fixed_size[0]\n        # draw_keypoints(warp_img, keypoint, \"affine.jpg\", 2, is_rel=True)\n\n        target[\"m\"] = m\n        target[\"m_inv\"] = m_inv\n        return warp_img, target\n"
  },
  {
    "path": "pytorch_keypoint/DeepPose/utils.py",
    "content": "import cv2\nimport numpy as np\n\n\ndef draw_keypoints(img: np.ndarray, coordinate: np.ndarray, save_path: str, radius: int = 3, is_rel: bool = False):\n    coordinate_ = coordinate.copy()\n    if is_rel:\n        h, w, c = img.shape\n        coordinate_[:, 0] *= w\n        coordinate_[:, 1] *= h\n    coordinate_ = coordinate_.astype(np.int64).tolist()\n\n    img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)\n    for x, y in coordinate_:\n        cv2.circle(img_bgr, center=(x, y), radius=radius, color=(255, 0, 0), thickness=-1)\n\n    cv2.imwrite(save_path, img_bgr)\n"
  },
  {
    "path": "pytorch_keypoint/DeepPose/wflw_horizontal_flip_indices.py",
    "content": "wflw_flip_indices_dict = {\n    0: 32,\n    1: 31,\n    2: 30,\n    3: 29,\n    4: 28,\n    5: 27,\n    6: 26,\n    7: 25,\n    8: 24,\n    9: 23,\n    10: 22,\n    11: 21,\n    12: 20,\n    13: 19,\n    14: 18,\n    15: 17,\n    16: 16,\n    17: 15,\n    18: 14,\n    19: 13,\n    20: 12,\n    21: 11,\n    22: 10,\n    23: 9,\n    24: 8,\n    25: 7,\n    26: 6,\n    27: 5,\n    28: 4,\n    29: 3,\n    30: 2,\n    31: 1,\n    32: 0,\n    33: 46,\n    34: 45,\n    35: 44,\n    36: 43,\n    37: 42,\n    38: 50,\n    39: 49,\n    40: 48,\n    41: 47,\n    42: 37,\n    43: 36,\n    44: 35,\n    45: 34,\n    46: 33,\n    47: 41,\n    48: 40,\n    49: 39,\n    50: 38,\n    51: 51,\n    52: 52,\n    53: 53,\n    54: 54,\n    55: 59,\n    56: 58,\n    57: 57,\n    58: 56,\n    59: 55,\n    60: 72,\n    61: 71,\n    62: 70,\n    63: 69,\n    64: 68,\n    65: 75,\n    66: 74,\n    67: 73,\n    68: 64,\n    69: 63,\n    70: 62,\n    71: 61,\n    72: 60,\n    73: 67,\n    74: 66,\n    75: 65,\n    76: 82,\n    77: 81,\n    78: 80,\n    79: 79,\n    80: 78,\n    81: 77,\n    82: 76,\n    83: 87,\n    84: 86,\n    85: 85,\n    86: 84,\n    87: 83,\n    88: 92,\n    89: 91,\n    90: 90,\n    91: 89,\n    92: 88,\n    93: 95,\n    94: 94,\n    95: 93,\n    96: 97,\n    97: 96,\n}\n"
  },
  {
    "path": "pytorch_keypoint/HRNet/README.md",
    "content": "# HRNet\n\n## 该项目主要参考以下仓库\n* https://github.com/leoxiaobin/deep-high-resolution-net.pytorch\n* https://github.com/stefanopini/simple-HRNet\n\n## 环境配置：\n* Python3.6/3.7/3.8\n* Pytorch1.10或以上\n* pycocotools(Linux:`pip install pycocotools`; Windows:`pip install pycocotools-windows`(不需要额外安装vs))\n* Ubuntu或Centos(不建议Windows)\n* 最好使用GPU训练\n* 详细环境配置见`requirements.txt`\n\n## 文件结构：\n```\n  ├── model: 搭建HRNet相关代码\n  ├── train_utils: 训练验证相关模块（包括coco验证相关）\n  ├── my_dataset_coco.py: 自定义dataset用于读取COCO2017数据集\n  ├── person_keypoints.json: COCO数据集中人体关键点相关信息\n  ├── train.py: 单GPU/CPU训练脚本\n  ├── train_multi_GPU.py: 针对使用多GPU的用户使用\n  ├── predict.py: 简易的预测脚本，使用训练好的权重进行预测\n  ├── validation.py: 利用训练好的权重验证/测试数据的COCO指标，并生成record_mAP.txt文件\n  └── transforms.py: 数据增强相关\n```\n\n## 预训练权重下载地址（下载后放入当前文件夹中）：\n由于原作者提供的预训练权重(Imagenet和COCO)是放在GoogleDrive和OneDrive上的，国内无法正常访问。所有我提前将权重文件全部下载并放在百度网盘中，\n需要的可以自行下载，链接:https://pan.baidu.com/s/1Lu6mMAWfm_8GGykttFMpVw 提取码:f43o\n\n下载后的目录结构如下：\n```\n├── pytorch\n      ├── pose_mpii\n      ├── pose_coco\n      │     ├── pose_resnet_50_384x288.pth\n      │     ├── pose_resnet_50_256x192.pth\n      │     ├── pose_resnet_101_384x288.pth\n      │     ├── pose_resnet_101_256x192.pth\n      │     ├── pose_hrnet_w32_384x288.pth\n      │     └── pose_hrnet_w32_256x192.pth\n      └── imagenet\n            ├── resnet50-19c8e357.pth\n            ├── resnet152-b121ed2d.pth\n            ├── resnet101-5d3b4d8f.pth\n            └── hrnet_w32-36af842e.pth\n```\n如果要直接使用在COCO数据集上预训练好的权重进行预测，下载pose_coco下的`pose_hrnet_w32_256x192.pth`使用即可。\n如果要从头训练网络，下载imagenet下的`hrnet_w32-36af842e.pth`文件，并重命名为`hrnet_w32.pth`即可。\n\n除此之外，还有一个`person_detection_results`文件，存储的是论文中提到的人体检测器的检测结果，如果需要使用可以下载，但个人建议直接使用COCO val中GT信息即可。\n链接: https://pan.baidu.com/s/19Z4mmNHUD934GQ9QYcF5iw  密码: i08q\n \n## 数据集，本例程使用的是COCO2017数据集\n* COCO官网地址：https://cocodataset.org/\n* 对数据集不了解的可以看下我写的博文：https://blog.csdn.net/qq_37541097/article/details/113247318\n* 这里以下载coco2017数据集为例，主要下载三个文件：\n    * `2017 Train images [118K/18GB]`：训练过程中使用到的所有图像文件\n    * `2017 Val images [5K/1GB]`：验证过程中使用到的所有图像文件\n    * `2017 Train/Val annotations [241MB]`：对应训练集和验证集的标注json文件\n* 都解压到`coco2017`文件夹下，可得到如下文件夹结构：\n```\n├── coco2017: 数据集根目录\n     ├── train2017: 所有训练图像文件夹(118287张)\n     ├── val2017: 所有验证图像文件夹(5000张)\n     └── annotations: 对应标注文件夹\n              ├── instances_train2017.json: 对应目标检测、分割任务的训练集标注文件\n              ├── instances_val2017.json: 对应目标检测、分割任务的验证集标注文件\n              ├── captions_train2017.json: 对应图像描述的训练集标注文件\n              ├── captions_val2017.json: 对应图像描述的验证集标注文件\n              ├── person_keypoints_train2017.json: 对应人体关键点检测的训练集标注文件\n              └── person_keypoints_val2017.json: 对应人体关键点检测的验证集标注文件夹\n```\n\n## 训练方法\n* 注：该项目从头训练HRNet在MS COCO2017的val上的mAP[@0.50:0.95]为76.1，利用原作者提供的权重在val上的mAP[@0.50:0.95]为76.6，相差0.5个点，\n暂时没有找到原因。由于训练该网络需要迭代210个epoch(按照论文中的数据)，训练时间很长，建议直接使用原作者提供训练好的权重。另外，在训练过程中发现GPU的利用率\n并不高(在20%~60%之间浮动)，暂时猜测是网络结构的原因。\n* 确保提前准备好数据集\n* 确保提前下载好对应预训练模型权重\n* 确保设置好`--num-joints`(对于人体检测的关键点个数，COCO是17个点)、`--fixed-size`(输入目标图像的高宽，默认[256, 192])和`--data-path`(指向`coco2017`目录)\n* 若要使用单GPU训练直接使用train.py训练脚本\n* 若要使用多GPU训练，使用`torchrun --nproc_per_node=8 train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量\n* 如果想指定使用哪些GPU设备可在指令前加上`CUDA_VISIBLE_DEVICES=0,3`(例如我只要使用设备中的第1块和第4块GPU设备)\n* `CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py`\n\n## 注意事项\n1. 在使用训练脚本时，注意要将`--data-path`设置为自己存放数据集的**根目录**：\n假设要使用COCO数据集，启用自定义数据集读取CocoDetection并将数据集解压到成/data/coco2017目录下\n```\npython train.py --data-path /data/coco2017\n```\n2. 训练过程中保存的`results.txt`是每个epoch在验证集上的COCO指标，前10个值是COCO指标，后面两个值是训练平均损失以及学习率\n3. 在使用预测脚本时，如果要读取自己训练好的权重要将`weights_path`设置为你自己生成的权重路径。\n\n\n## 如果对HRNet网络不是很理解可参考我的bilibili\nhttps://www.bilibili.com/video/BV1bB4y1y7qP\n\n## 进一步了解该项目，以及对HRNet代码的分析可参考我的bilibili\nhttps://www.bilibili.com/video/BV1ar4y157JM\n\n## HRNet网络结构图\n![HRNet.png](HRNet.png)\n"
  },
  {
    "path": "pytorch_keypoint/HRNet/draw_utils.py",
    "content": "import numpy as np\nfrom numpy import ndarray\nimport PIL\nfrom PIL import ImageDraw, ImageFont\nfrom PIL.Image import Image\n\n# COCO 17 points\npoint_name = [\"nose\", \"left_eye\", \"right_eye\",\n              \"left_ear\", \"right_ear\",\n              \"left_shoulder\", \"right_shoulder\",\n              \"left_elbow\", \"right_elbow\",\n              \"left_wrist\", \"right_wrist\",\n              \"left_hip\", \"right_hip\",\n              \"left_knee\", \"right_knee\",\n              \"left_ankle\", \"right_ankle\"]\n\npoint_color = [(240, 2, 127), (240, 2, 127), (240, 2, 127),\n               (240, 2, 127), (240, 2, 127),\n               (255, 255, 51), (255, 255, 51),\n               (254, 153, 41), (44, 127, 184),\n               (217, 95, 14), (0, 0, 255),\n               (255, 255, 51), (255, 255, 51), (228, 26, 28),\n               (49, 163, 84), (252, 176, 243), (0, 176, 240),\n               (255, 255, 0), (169, 209, 142),\n               (255, 255, 0), (169, 209, 142),\n               (255, 255, 0), (169, 209, 142)]\n\n\ndef draw_keypoints(img: Image,\n                   keypoints: ndarray,\n                   scores: ndarray = None,\n                   thresh: float = 0.2,\n                   r: int = 2,\n                   draw_text: bool = False,\n                   font: str = 'arial.ttf',\n                   font_size: int = 10):\n    if isinstance(img, ndarray):\n        img = PIL.Image.fromarray(img)\n\n    if scores is None:\n        scores = np.ones(keypoints.shape[0])\n\n    if draw_text:\n        try:\n            font = ImageFont.truetype(font, font_size)\n        except IOError:\n            font = ImageFont.load_default()\n\n    draw = ImageDraw.Draw(img)\n    for i, (point, score) in enumerate(zip(keypoints, scores)):\n        if score > thresh and np.max(point) > 0:\n            draw.ellipse([point[0] - r, point[1] - r, point[0] + r, point[1] + r],\n                         fill=point_color[i],\n                         outline=(255, 255, 255))\n            if draw_text:\n                draw.text((point[0] + r, point[1] + r), text=point_name[i], font=font)\n\n    return img\n"
  },
  {
    "path": "pytorch_keypoint/HRNet/model/__init__.py",
    "content": "from .hrnet import HighResolutionNet\n"
  },
  {
    "path": "pytorch_keypoint/HRNet/model/hrnet.py",
    "content": "import torch.nn as nn\n\nBN_MOMENTUM = 0.1\n\n\nclass BasicBlock(nn.Module):\n    expansion = 1\n\n    def __init__(self, inplanes, planes, stride=1, downsample=None):\n        super(BasicBlock, self).__init__()\n        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)\n        self.relu = nn.ReLU(inplace=True)\n        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)\n        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)\n        self.downsample = downsample\n        self.stride = stride\n\n    def forward(self, x):\n        residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n\n        if self.downsample is not None:\n            residual = self.downsample(x)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\n\nclass Bottleneck(nn.Module):\n    expansion = 4\n\n    def __init__(self, inplanes, planes, stride=1, downsample=None):\n        super(Bottleneck, self).__init__()\n        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)\n        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,\n                               padding=1, bias=False)\n        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)\n        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,\n                               bias=False)\n        self.bn3 = nn.BatchNorm2d(planes * self.expansion,\n                                  momentum=BN_MOMENTUM)\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n        self.stride = stride\n\n    def forward(self, x):\n        residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        if self.downsample is not None:\n            residual = self.downsample(x)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\n\nclass StageModule(nn.Module):\n    def __init__(self, input_branches, output_branches, c):\n        \"\"\"\n        构建对应stage，即用来融合不同尺度的实现\n        :param input_branches: 输入的分支数，每个分支对应一种尺度\n        :param output_branches: 输出的分支数\n        :param c: 输入的第一个分支通道数\n        \"\"\"\n        super().__init__()\n        self.input_branches = input_branches\n        self.output_branches = output_branches\n\n        self.branches = nn.ModuleList()\n        for i in range(self.input_branches):  # 每个分支上都先通过4个BasicBlock\n            w = c * (2 ** i)  # 对应第i个分支的通道数\n            branch = nn.Sequential(\n                BasicBlock(w, w),\n                BasicBlock(w, w),\n                BasicBlock(w, w),\n                BasicBlock(w, w)\n            )\n            self.branches.append(branch)\n\n        self.fuse_layers = nn.ModuleList()  # 用于融合每个分支上的输出\n        for i in range(self.output_branches):\n            self.fuse_layers.append(nn.ModuleList())\n            for j in range(self.input_branches):\n                if i == j:\n                    # 当输入、输出为同一个分支时不做任何处理\n                    self.fuse_layers[-1].append(nn.Identity())\n                elif i < j:\n                    # 当输入分支j大于输出分支i时(即输入分支下采样率大于输出分支下采样率)，\n                    # 此时需要对输入分支j进行通道调整以及上采样，方便后续相加\n                    self.fuse_layers[-1].append(\n                        nn.Sequential(\n                            nn.Conv2d(c * (2 ** j), c * (2 ** i), kernel_size=1, stride=1, bias=False),\n                            nn.BatchNorm2d(c * (2 ** i), momentum=BN_MOMENTUM),\n                            nn.Upsample(scale_factor=2.0 ** (j - i), mode='nearest')\n                        )\n                    )\n                else:  # i > j\n                    # 当输入分支j小于输出分支i时(即输入分支下采样率小于输出分支下采样率)，\n                    # 此时需要对输入分支j进行通道调整以及下采样，方便后续相加\n                    # 注意，这里每次下采样2x都是通过一个3x3卷积层实现的，4x就是两个，8x就是三个，总共i-j个\n                    ops = []\n                    # 前i-j-1个卷积层不用变通道，只进行下采样\n                    for k in range(i - j - 1):\n                        ops.append(\n                            nn.Sequential(\n                                nn.Conv2d(c * (2 ** j), c * (2 ** j), kernel_size=3, stride=2, padding=1, bias=False),\n                                nn.BatchNorm2d(c * (2 ** j), momentum=BN_MOMENTUM),\n                                nn.ReLU(inplace=True)\n                            )\n                        )\n                    # 最后一个卷积层不仅要调整通道，还要进行下采样\n                    ops.append(\n                        nn.Sequential(\n                            nn.Conv2d(c * (2 ** j), c * (2 ** i), kernel_size=3, stride=2, padding=1, bias=False),\n                            nn.BatchNorm2d(c * (2 ** i), momentum=BN_MOMENTUM)\n                        )\n                    )\n                    self.fuse_layers[-1].append(nn.Sequential(*ops))\n\n        self.relu = nn.ReLU(inplace=True)\n\n    def forward(self, x):\n        # 每个分支通过对应的block\n        x = [branch(xi) for branch, xi in zip(self.branches, x)]\n\n        # 接着融合不同尺寸信息\n        x_fused = []\n        for i in range(len(self.fuse_layers)):\n            x_fused.append(\n                self.relu(\n                    sum([self.fuse_layers[i][j](x[j]) for j in range(len(self.branches))])\n                )\n            )\n\n        return x_fused\n\n\nclass HighResolutionNet(nn.Module):\n    def __init__(self, base_channel: int = 32, num_joints: int = 17):\n        super().__init__()\n        # Stem\n        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)\n        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False)\n        self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)\n        self.relu = nn.ReLU(inplace=True)\n\n        # Stage1\n        downsample = nn.Sequential(\n            nn.Conv2d(64, 256, kernel_size=1, stride=1, bias=False),\n            nn.BatchNorm2d(256, momentum=BN_MOMENTUM)\n        )\n        self.layer1 = nn.Sequential(\n            Bottleneck(64, 64, downsample=downsample),\n            Bottleneck(256, 64),\n            Bottleneck(256, 64),\n            Bottleneck(256, 64)\n        )\n\n        self.transition1 = nn.ModuleList([\n            nn.Sequential(\n                nn.Conv2d(256, base_channel, kernel_size=3, stride=1, padding=1, bias=False),\n                nn.BatchNorm2d(base_channel, momentum=BN_MOMENTUM),\n                nn.ReLU(inplace=True)\n            ),\n            nn.Sequential(\n                nn.Sequential(  # 这里又使用一次Sequential是为了适配原项目中提供的权重\n                    nn.Conv2d(256, base_channel * 2, kernel_size=3, stride=2, padding=1, bias=False),\n                    nn.BatchNorm2d(base_channel * 2, momentum=BN_MOMENTUM),\n                    nn.ReLU(inplace=True)\n                )\n            )\n        ])\n\n        # Stage2\n        self.stage2 = nn.Sequential(\n            StageModule(input_branches=2, output_branches=2, c=base_channel)\n        )\n\n        # transition2\n        self.transition2 = nn.ModuleList([\n            nn.Identity(),  # None,  - Used in place of \"None\" because it is callable\n            nn.Identity(),  # None,  - Used in place of \"None\" because it is callable\n            nn.Sequential(\n                nn.Sequential(\n                    nn.Conv2d(base_channel * 2, base_channel * 4, kernel_size=3, stride=2, padding=1, bias=False),\n                    nn.BatchNorm2d(base_channel * 4, momentum=BN_MOMENTUM),\n                    nn.ReLU(inplace=True)\n                )\n            )\n        ])\n\n        # Stage3\n        self.stage3 = nn.Sequential(\n            StageModule(input_branches=3, output_branches=3, c=base_channel),\n            StageModule(input_branches=3, output_branches=3, c=base_channel),\n            StageModule(input_branches=3, output_branches=3, c=base_channel),\n            StageModule(input_branches=3, output_branches=3, c=base_channel)\n        )\n\n        # transition3\n        self.transition3 = nn.ModuleList([\n            nn.Identity(),  # None,  - Used in place of \"None\" because it is callable\n            nn.Identity(),  # None,  - Used in place of \"None\" because it is callable\n            nn.Identity(),  # None,  - Used in place of \"None\" because it is callable\n            nn.Sequential(\n                nn.Sequential(\n                    nn.Conv2d(base_channel * 4, base_channel * 8, kernel_size=3, stride=2, padding=1, bias=False),\n                    nn.BatchNorm2d(base_channel * 8, momentum=BN_MOMENTUM),\n                    nn.ReLU(inplace=True)\n                )\n            )\n        ])\n\n        # Stage4\n        # 注意，最后一个StageModule只输出分辨率最高的特征层\n        self.stage4 = nn.Sequential(\n            StageModule(input_branches=4, output_branches=4, c=base_channel),\n            StageModule(input_branches=4, output_branches=4, c=base_channel),\n            StageModule(input_branches=4, output_branches=1, c=base_channel)\n        )\n\n        # Final layer\n        self.final_layer = nn.Conv2d(base_channel, num_joints, kernel_size=1, stride=1)\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.conv2(x)\n        x = self.bn2(x)\n        x = self.relu(x)\n\n        x = self.layer1(x)\n        x = [trans(x) for trans in self.transition1]  # Since now, x is a list\n\n        x = self.stage2(x)\n        x = [\n            self.transition2[0](x[0]),\n            self.transition2[1](x[1]),\n            self.transition2[2](x[-1])\n        ]  # New branch derives from the \"upper\" branch only\n\n        x = self.stage3(x)\n        x = [\n            self.transition3[0](x[0]),\n            self.transition3[1](x[1]),\n            self.transition3[2](x[2]),\n            self.transition3[3](x[-1]),\n        ]  # New branch derives from the \"upper\" branch only\n\n        x = self.stage4(x)\n\n        x = self.final_layer(x[0])\n\n        return x\n"
  },
  {
    "path": "pytorch_keypoint/HRNet/my_dataset_coco.py",
    "content": "import os\nimport copy\n\nimport torch\nimport numpy as np\nimport cv2\nimport torch.utils.data as data\nfrom pycocotools.coco import COCO\n\n\nclass CocoKeypoint(data.Dataset):\n    def __init__(self,\n                 root,\n                 dataset=\"train\",\n                 years=\"2017\",\n                 transforms=None,\n                 det_json_path=None,\n                 fixed_size=(256, 192)):\n        super().__init__()\n        assert dataset in [\"train\", \"val\"], 'dataset must be in [\"train\", \"val\"]'\n        anno_file = f\"person_keypoints_{dataset}{years}.json\"\n        assert os.path.exists(root), \"file '{}' does not exist.\".format(root)\n        self.img_root = os.path.join(root, f\"{dataset}{years}\")\n        assert os.path.exists(self.img_root), \"path '{}' does not exist.\".format(self.img_root)\n        self.anno_path = os.path.join(root, \"annotations\", anno_file)\n        assert os.path.exists(self.anno_path), \"file '{}' does not exist.\".format(self.anno_path)\n\n        self.fixed_size = fixed_size\n        self.mode = dataset\n        self.transforms = transforms\n        self.coco = COCO(self.anno_path)\n        img_ids = list(sorted(self.coco.imgs.keys()))\n\n        if det_json_path is not None:\n            det = self.coco.loadRes(det_json_path)\n        else:\n            det = self.coco\n\n        self.valid_person_list = []\n        obj_idx = 0\n        for img_id in img_ids:\n            img_info = self.coco.loadImgs(img_id)[0]\n            ann_ids = det.getAnnIds(imgIds=img_id)\n            anns = det.loadAnns(ann_ids)\n            for ann in anns:\n                # only save person class\n                if ann[\"category_id\"] != 1:\n                    print(f'warning: find not support id: {ann[\"category_id\"]}, only support id: 1 (person)')\n                    continue\n\n                # COCO_val2017_detections_AP_H_56_person.json文件中只有det信息，没有keypoint信息，跳过检查\n                if det_json_path is None:\n                    # skip objs without keypoints annotation\n                    if \"keypoints\" not in ann:\n                        continue\n                    if max(ann[\"keypoints\"]) == 0:\n                        continue\n\n                xmin, ymin, w, h = ann['bbox']\n                # Use only valid bounding boxes\n                if w > 0 and h > 0:\n                    info = {\n                        \"box\": [xmin, ymin, w, h],\n                        \"image_path\": os.path.join(self.img_root, img_info[\"file_name\"]),\n                        \"image_id\": img_id,\n                        \"image_width\": img_info['width'],\n                        \"image_height\": img_info['height'],\n                        \"obj_origin_hw\": [h, w],\n                        \"obj_index\": obj_idx,\n                        \"score\": ann[\"score\"] if \"score\" in ann else 1.\n                    }\n\n                    # COCO_val2017_detections_AP_H_56_person.json文件中只有det信息，没有keypoint信息，跳过\n                    if det_json_path is None:\n                        keypoints = np.array(ann[\"keypoints\"]).reshape([-1, 3])\n                        visible = keypoints[:, 2]\n                        keypoints = keypoints[:, :2]\n                        info[\"keypoints\"] = keypoints\n                        info[\"visible\"] = visible\n\n                    self.valid_person_list.append(info)\n                    obj_idx += 1\n\n    def __getitem__(self, idx):\n        target = copy.deepcopy(self.valid_person_list[idx])\n\n        image = cv2.imread(target[\"image_path\"])\n        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n        if self.transforms is not None:\n            image, person_info = self.transforms(image, target)\n\n        return image, target\n\n    def __len__(self):\n        return len(self.valid_person_list)\n\n    @staticmethod\n    def collate_fn(batch):\n        imgs_tuple, targets_tuple = tuple(zip(*batch))\n        imgs_tensor = torch.stack(imgs_tuple)\n        return imgs_tensor, targets_tuple\n\n\nif __name__ == '__main__':\n    train = CocoKeypoint(\"/data/coco2017/\", dataset=\"val\")\n    print(len(train))\n    t = train[0]\n    print(t)\n"
  },
  {
    "path": "pytorch_keypoint/HRNet/person_keypoints.json",
    "content": "{\n  \"keypoints\": [\"nose\",\"left_eye\",\"right_eye\",\"left_ear\",\"right_ear\",\"left_shoulder\",\"right_shoulder\",\"left_elbow\",\"right_elbow\",\"left_wrist\",\"right_wrist\",\"left_hip\",\"right_hip\",\"left_knee\",\"right_knee\",\"left_ankle\",\"right_ankle\"],\n  \"skeleton\": [[16,14],[14,12],[17,15],[15,13],[12,13],[6,12],[7,13],[6,7],[6,8],[7,9],[8,10],[9,11],[2,3],[1,2],[1,3],[2,4],[3,5],[4,6],[5,7]],\n  \"flip_pairs\": [[1,2], [3,4], [5,6], [7,8], [9,10], [11,12], [13,14], [15,16]],\n  \"kps_weights\": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.2, 1.2, 1.5, 1.5, 1.0, 1.0, 1.2, 1.2, 1.5, 1.5],\n  \"upper_body_ids\": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n  \"lower_body_ids\": [11, 12, 13, 14, 15, 16]\n}"
  },
  {
    "path": "pytorch_keypoint/HRNet/plot_curve.py",
    "content": "import datetime\nimport matplotlib.pyplot as plt\n\n\ndef plot_loss_and_lr(train_loss, learning_rate):\n    try:\n        x = list(range(len(train_loss)))\n        fig, ax1 = plt.subplots(1, 1)\n        ax1.plot(x, train_loss, 'r', label='loss')\n        ax1.set_xlabel(\"step\")\n        ax1.set_ylabel(\"loss\")\n        ax1.set_title(\"Train Loss and lr\")\n        plt.legend(loc='best')\n\n        ax2 = ax1.twinx()\n        ax2.plot(x, learning_rate, label='lr')\n        ax2.set_ylabel(\"learning rate\")\n        ax2.set_xlim(0, len(train_loss))  # 设置横坐标整数间隔\n        plt.legend(loc='best')\n\n        handles1, labels1 = ax1.get_legend_handles_labels()\n        handles2, labels2 = ax2.get_legend_handles_labels()\n        plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right')\n\n        fig.subplots_adjust(right=0.8)  # 防止出现保存图片显示不全的情况\n        fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")))\n        plt.close()\n        print(\"successful save loss curve! \")\n    except Exception as e:\n        print(e)\n\n\ndef plot_map(mAP):\n    try:\n        x = list(range(len(mAP)))\n        plt.plot(x, mAP, label='mAp')\n        plt.xlabel('epoch')\n        plt.ylabel('mAP')\n        plt.title('Eval mAP')\n        plt.xlim(0, len(mAP))\n        plt.legend(loc='best')\n        plt.savefig('./mAP.png')\n        plt.close()\n        print(\"successful save mAP curve!\")\n    except Exception as e:\n        print(e)\n"
  },
  {
    "path": "pytorch_keypoint/HRNet/predict.py",
    "content": "import os\nimport json\n\nimport torch\nimport numpy as np\nimport cv2\nimport matplotlib.pyplot as plt\n\nfrom model import HighResolutionNet\nfrom draw_utils import draw_keypoints\nimport transforms\n\n\ndef predict_all_person():\n    # TODO\n    pass\n\n\ndef predict_single_person():\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n    print(f\"using device: {device}\")\n\n    flip_test = True\n    resize_hw = (256, 192)\n    img_path = \"./person.png\"\n    weights_path = \"./pose_hrnet_w32_256x192.pth\"\n    keypoint_json_path = \"person_keypoints.json\"\n    assert os.path.exists(img_path), f\"file: {img_path} does not exist.\"\n    assert os.path.exists(weights_path), f\"file: {weights_path} does not exist.\"\n    assert os.path.exists(keypoint_json_path), f\"file: {keypoint_json_path} does not exist.\"\n\n    data_transform = transforms.Compose([\n        transforms.AffineTransform(scale=(1.25, 1.25), fixed_size=resize_hw),\n        transforms.ToTensor(),\n        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n    ])\n\n    # read json file\n    with open(keypoint_json_path, \"r\") as f:\n        person_info = json.load(f)\n\n    # read single-person image\n    img = cv2.imread(img_path)\n    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n    img_tensor, target = data_transform(img, {\"box\": [0, 0, img.shape[1] - 1, img.shape[0] - 1]})\n    img_tensor = torch.unsqueeze(img_tensor, dim=0)\n\n    # create model\n    # HRNet-W32: base_channel=32\n    # HRNet-W48: base_channel=48\n    model = HighResolutionNet(base_channel=32)\n    weights = torch.load(weights_path, map_location=device)\n    weights = weights if \"model\" not in weights else weights[\"model\"]\n    model.load_state_dict(weights)\n    model.to(device)\n    model.eval()\n\n    with torch.inference_mode():\n        outputs = model(img_tensor.to(device))\n\n        if flip_test:\n            flip_tensor = transforms.flip_images(img_tensor)\n            flip_outputs = torch.squeeze(\n                transforms.flip_back(model(flip_tensor.to(device)), person_info[\"flip_pairs\"]),\n            )\n            # feature is not aligned, shift flipped heatmap for higher accuracy\n            # https://github.com/leoxiaobin/deep-high-resolution-net.pytorch/issues/22\n            flip_outputs[..., 1:] = flip_outputs.clone()[..., 0: -1]\n            outputs = (outputs + flip_outputs) * 0.5\n\n        keypoints, scores = transforms.get_final_preds(outputs, [target[\"reverse_trans\"]], True)\n        keypoints = np.squeeze(keypoints)\n        scores = np.squeeze(scores)\n\n        plot_img = draw_keypoints(img, keypoints, scores, thresh=0.2, r=3)\n        plt.imshow(plot_img)\n        plt.show()\n        plot_img.save(\"test_result.jpg\")\n\n\nif __name__ == '__main__':\n    predict_single_person()\n"
  },
  {
    "path": "pytorch_keypoint/HRNet/requirements.txt",
    "content": "numpy\nopencv_python==4.5.4.60\nlxml\ntorch==1.10.1\ntorchvision==0.11.1\npycocotools\nmatplotlib\ntqdm"
  },
  {
    "path": "pytorch_keypoint/HRNet/train.py",
    "content": "import json\nimport os\nimport datetime\n\nimport torch\nfrom torch.utils import data\nimport numpy as np\n\nimport transforms\nfrom model import HighResolutionNet\nfrom my_dataset_coco import CocoKeypoint\nfrom train_utils import train_eval_utils as utils\n\n\ndef create_model(num_joints, load_pretrain_weights=True):\n    model = HighResolutionNet(base_channel=32, num_joints=num_joints)\n    \n    if load_pretrain_weights:\n        # 载入预训练模型权重\n        # 链接:https://pan.baidu.com/s/1Lu6mMAWfm_8GGykttFMpVw 提取码:f43o\n        weights_dict = torch.load(\"./hrnet_w32.pth\", map_location='cpu')\n\n        for k in list(weights_dict.keys()):\n            # 如果载入的是imagenet权重，就删除无用权重\n            if (\"head\" in k) or (\"fc\" in k):\n                del weights_dict[k]\n\n            # 如果载入的是coco权重，对比下num_joints，如果不相等就删除\n            if \"final_layer\" in k:\n                if weights_dict[k].shape[0] != num_joints:\n                    del weights_dict[k]\n\n        missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False)\n        if len(missing_keys) != 0:\n            print(\"missing_keys: \", missing_keys)\n\n    return model\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n    print(\"Using {} device training.\".format(device.type))\n\n    # 用来保存coco_info的文件\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    with open(args.keypoints_path, \"r\") as f:\n        person_kps_info = json.load(f)\n\n    fixed_size = args.fixed_size\n    heatmap_hw = (args.fixed_size[0] // 4, args.fixed_size[1] // 4)\n    kps_weights = np.array(person_kps_info[\"kps_weights\"],\n                           dtype=np.float32).reshape((args.num_joints,))\n    data_transform = {\n        \"train\": transforms.Compose([\n            transforms.HalfBody(0.3, person_kps_info[\"upper_body_ids\"], person_kps_info[\"lower_body_ids\"]),\n            transforms.AffineTransform(scale=(0.65, 1.35), rotation=(-45, 45), fixed_size=fixed_size),\n            transforms.RandomHorizontalFlip(0.5, person_kps_info[\"flip_pairs\"]),\n            transforms.KeypointToHeatMap(heatmap_hw=heatmap_hw, gaussian_sigma=2, keypoints_weights=kps_weights),\n            transforms.ToTensor(),\n            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n        ]),\n        \"val\": transforms.Compose([\n            transforms.AffineTransform(scale=(1.25, 1.25), fixed_size=fixed_size),\n            transforms.ToTensor(),\n            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n        ])\n    }\n\n    data_root = args.data_path\n\n    # load train data set\n    # coco2017 -> annotations -> person_keypoints_train2017.json\n    train_dataset = CocoKeypoint(data_root, \"train\", transforms=data_transform[\"train\"], fixed_size=args.fixed_size)\n\n    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch\n    batch_size = args.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using %g dataloader workers' % nw)\n\n    train_data_loader = data.DataLoader(train_dataset,\n                                        batch_size=batch_size,\n                                        shuffle=True,\n                                        pin_memory=True,\n                                        num_workers=nw,\n                                        collate_fn=train_dataset.collate_fn)\n\n    # load validation data set\n    # coco2017 -> annotations -> person_keypoints_val2017.json\n    val_dataset = CocoKeypoint(data_root, \"val\", transforms=data_transform[\"val\"], fixed_size=args.fixed_size,\n                               det_json_path=args.person_det)\n    val_data_loader = data.DataLoader(val_dataset,\n                                      batch_size=batch_size,\n                                      shuffle=False,\n                                      pin_memory=True,\n                                      num_workers=nw,\n                                      collate_fn=val_dataset.collate_fn)\n\n    # create model\n    model = create_model(num_joints=args.num_joints)\n    # print(model)\n\n    model.to(device)\n\n    # define optimizer\n    params = [p for p in model.parameters() if p.requires_grad]\n    optimizer = torch.optim.AdamW(params,\n                                  lr=args.lr,\n                                  weight_decay=args.weight_decay)\n\n    scaler = torch.cuda.amp.GradScaler() if args.amp else None\n\n    # learning rate scheduler\n    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)\n\n    # 如果指定了上次训练保存的权重文件地址，则接着上次结果接着训练\n    if args.resume != \"\":\n        checkpoint = torch.load(args.resume, map_location='cpu')\n        model.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n        if args.amp and \"scaler\" in checkpoint:\n            scaler.load_state_dict(checkpoint[\"scaler\"])\n        print(\"the training process from epoch{}...\".format(args.start_epoch))\n\n    train_loss = []\n    learning_rate = []\n    val_map = []\n\n    for epoch in range(args.start_epoch, args.epochs):\n        # train for one epoch, printing every 50 iterations\n        mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader,\n                                              device=device, epoch=epoch,\n                                              print_freq=50, warmup=True,\n                                              scaler=scaler)\n        train_loss.append(mean_loss.item())\n        learning_rate.append(lr)\n\n        # update the learning rate\n        lr_scheduler.step()\n\n        # evaluate on the test dataset\n        coco_info = utils.evaluate(model, val_data_loader, device=device,\n                                   flip=True, flip_pairs=person_kps_info[\"flip_pairs\"])\n\n        # write into txt\n        with open(results_file, \"a\") as f:\n            # 写入的数据包括coco指标还有loss和learning rate\n            result_info = [f\"{i:.4f}\" for i in coco_info + [mean_loss.item()]] + [f\"{lr:.6f}\"]\n            txt = \"epoch:{} {}\".format(epoch, '  '.join(result_info))\n            f.write(txt + \"\\n\")\n\n        val_map.append(coco_info[1])  # @0.5 mAP\n\n        # save weights\n        save_files = {\n            'model': model.state_dict(),\n            'optimizer': optimizer.state_dict(),\n            'lr_scheduler': lr_scheduler.state_dict(),\n            'epoch': epoch}\n        if args.amp:\n            save_files[\"scaler\"] = scaler.state_dict()\n        torch.save(save_files, \"./save_weights/model-{}.pth\".format(epoch))\n\n    # plot loss and lr curve\n    if len(train_loss) != 0 and len(learning_rate) != 0:\n        from plot_curve import plot_loss_and_lr\n        plot_loss_and_lr(train_loss, learning_rate)\n\n    # plot mAP curve\n    if len(val_map) != 0:\n        from plot_curve import plot_map\n        plot_map(val_map)\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 训练设备类型\n    parser.add_argument('--device', default='cuda:0', help='device')\n    # 训练数据集的根目录(coco2017)\n    parser.add_argument('--data-path', default='/data/coco2017', help='dataset')\n    # COCO数据集人体关键点信息\n    parser.add_argument('--keypoints-path', default=\"./person_keypoints.json\", type=str,\n                        help='person_keypoints.json path')\n    # 原项目提供的验证集person检测信息，如果要使用GT信息，直接将该参数置为None，建议设置成None\n    parser.add_argument('--person-det', type=str, default=None)\n    parser.add_argument('--fixed-size', default=[256, 192], nargs='+', type=int, help='input size')\n    # keypoints点数\n    parser.add_argument('--num-joints', default=17, type=int, help='num_joints')\n    # 文件保存地址\n    parser.add_argument('--output-dir', default='./save_weights', help='path where to save')\n    # 若需要接着上次训练，则指定上次训练保存权重文件地址\n    parser.add_argument('--resume', default='', type=str, help='resume from checkpoint')\n    # 指定接着从哪个epoch数开始训练\n    parser.add_argument('--start-epoch', default=0, type=int, help='start epoch')\n    # 训练的总epoch数\n    parser.add_argument('--epochs', default=210, type=int, metavar='N',\n                        help='number of total epochs to run')\n    # 针对torch.optim.lr_scheduler.MultiStepLR的参数\n    parser.add_argument('--lr-steps', default=[170, 200], nargs='+', type=int, help='decrease lr every step-size epochs')\n    # 针对torch.optim.lr_scheduler.MultiStepLR的参数\n    parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')\n    # 学习率\n    parser.add_argument('--lr', default=0.001, type=float,\n                        help='initial learning rate, 0.02 is the default value for training '\n                             'on 8 gpus and 2 images_per_gpu')\n    # AdamW的weight_decay参数\n    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,\n                        metavar='W', help='weight decay (default: 1e-4)',\n                        dest='weight_decay')\n    # 训练的batch size\n    parser.add_argument('--batch-size', default=32, type=int, metavar='N',\n                        help='batch size when training.')\n    # 是否使用混合精度训练(需要GPU支持混合精度)\n    parser.add_argument(\"--amp\", action=\"store_true\", help=\"Use torch.cuda.amp for mixed precision training\")\n\n    args = parser.parse_args()\n    print(args)\n\n    # 检查保存权重文件夹是否存在，不存在则创建\n    if not os.path.exists(args.output_dir):\n        os.makedirs(args.output_dir)\n\n    main(args)\n"
  },
  {
    "path": "pytorch_keypoint/HRNet/train_multi_GPU.py",
    "content": "import json\nimport time\nimport os\nimport datetime\n\nimport torch\nfrom torch.utils import data\nimport numpy as np\n\nimport transforms\nfrom model import HighResolutionNet\nfrom my_dataset_coco import CocoKeypoint\nimport train_utils.train_eval_utils as utils\nfrom train_utils import init_distributed_mode, save_on_master, mkdir\n\n\ndef create_model(num_joints, load_pretrain_weights=True):\n    model = HighResolutionNet(base_channel=32, num_joints=num_joints)\n\n    if load_pretrain_weights:\n        # 载入预训练模型权重\n        # 链接:https://pan.baidu.com/s/1Lu6mMAWfm_8GGykttFMpVw 提取码:f43o\n        weights_dict = torch.load(\"./hrnet_w32.pth\", map_location='cpu')\n\n        for k in list(weights_dict.keys()):\n            # 如果载入的是imagenet权重，就删除无用权重\n            if (\"head\" in k) or (\"fc\" in k):\n                del weights_dict[k]\n\n            # 如果载入的是coco权重，对比下num_joints，如果不相等就删除\n            if \"final_layer\" in k:\n                if weights_dict[k].shape[0] != num_joints:\n                    del weights_dict[k]\n\n        missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False)\n        if len(missing_keys) != 0:\n            print(\"missing_keys: \", missing_keys)\n\n    return model\n\n\ndef main(args):\n    init_distributed_mode(args)\n    print(args)\n\n    device = torch.device(args.device)\n\n    # 用来保存coco_info的文件\n    now = datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n    key_results_file = f\"results{now}.txt\"\n\n    with open(args.keypoints_path, \"r\") as f:\n        person_kps_info = json.load(f)\n\n    fixed_size = args.fixed_size\n    heatmap_hw = (args.fixed_size[0] // 4, args.fixed_size[1] // 4)\n    kps_weights = np.array(person_kps_info[\"kps_weights\"],\n                           dtype=np.float32).reshape((args.num_joints,))\n    data_transform = {\n        \"train\": transforms.Compose([\n            transforms.HalfBody(0.3, person_kps_info[\"upper_body_ids\"], person_kps_info[\"lower_body_ids\"]),\n            transforms.AffineTransform(scale=(0.65, 1.35), rotation=(-45, 45), fixed_size=fixed_size),\n            transforms.RandomHorizontalFlip(0.5, person_kps_info[\"flip_pairs\"]),\n            transforms.KeypointToHeatMap(heatmap_hw=heatmap_hw, gaussian_sigma=2, keypoints_weights=kps_weights),\n            transforms.ToTensor(),\n            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n        ]),\n        \"val\": transforms.Compose([\n            transforms.AffineTransform(scale=(1.25, 1.25), fixed_size=fixed_size),\n            transforms.ToTensor(),\n            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n        ])\n    }\n\n    data_root = args.data_path\n\n    # load train data set\n    # coco2017 -> annotations -> person_keypoints_train2017.json\n    train_dataset = CocoKeypoint(data_root, \"train\", transforms=data_transform[\"train\"], fixed_size=args.fixed_size)\n\n    # load validation data set\n    # coco2017 -> annotations -> person_keypoints_val2017.json\n    val_dataset = CocoKeypoint(data_root, \"val\", transforms=data_transform[\"val\"], fixed_size=args.fixed_size,\n                               det_json_path=args.person_det)\n\n    print(\"Creating data loaders\")\n    if args.distributed:\n        train_sampler = data.distributed.DistributedSampler(train_dataset)\n        test_sampler = data.distributed.DistributedSampler(val_dataset)\n    else:\n        train_sampler = data.RandomSampler(train_dataset)\n        test_sampler = data.SequentialSampler(val_dataset)\n\n    train_batch_sampler = data.BatchSampler(train_sampler, args.batch_size, drop_last=True)\n\n    data_loader = data.DataLoader(train_dataset,\n                                  batch_sampler=train_batch_sampler,\n                                  num_workers=args.workers,\n                                  collate_fn=train_dataset.collate_fn)\n\n    data_loader_test = data.DataLoader(val_dataset,\n                                       batch_size=args.batch_size,\n                                       sampler=test_sampler,\n                                       num_workers=args.workers,\n                                       collate_fn=train_dataset.collate_fn)\n\n    print(\"Creating model\")\n    # create model num_classes equal background + classes\n    model = create_model(num_joints=args.num_joints)\n    model.to(device)\n\n    if args.distributed and args.sync_bn:\n        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)\n\n    model_without_ddp = model\n    if args.distributed:\n        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])\n        model_without_ddp = model.module\n\n    params = [p for p in model.parameters() if p.requires_grad]\n    optimizer = torch.optim.AdamW(params,\n                                  lr=args.lr,\n                                  weight_decay=args.weight_decay)\n\n    scaler = torch.cuda.amp.GradScaler() if args.amp else None\n\n    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)\n\n    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练\n    if args.resume:\n        # If map_location is missing, torch.load will first load the module to CPU\n        # and then copy each parameter to where it was saved,\n        # which would result in all processes on the same machine using the same set of devices.\n        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)\n        model_without_ddp.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n        if args.amp and \"scaler\" in checkpoint:\n            scaler.load_state_dict(checkpoint[\"scaler\"])\n\n    if args.test_only:\n        utils.evaluate(model, data_loader_test, device=device,\n                       flip=True, flip_pairs=person_kps_info[\"flip_pairs\"])\n        return\n\n    train_loss = []\n    learning_rate = []\n    val_map = []\n\n    print(\"Start training\")\n    start_time = time.time()\n    for epoch in range(args.start_epoch, args.epochs):\n        if args.distributed:\n            train_sampler.set_epoch(epoch)\n        mean_loss, lr = utils.train_one_epoch(model, optimizer, data_loader,\n                                              device, epoch, args.print_freq,\n                                              warmup=True, scaler=scaler)\n\n        # update learning rate\n        lr_scheduler.step()\n\n        # evaluate after every epoch\n        key_info = utils.evaluate(model, data_loader_test, device=device,\n                                  flip=True, flip_pairs=person_kps_info[\"flip_pairs\"])\n\n        # 只在主进程上进行写操作\n        if args.rank in [-1, 0]:\n            train_loss.append(mean_loss.item())\n            learning_rate.append(lr)\n            val_map.append(key_info[1])  # @0.5 mAP\n\n            # write into txt\n            with open(key_results_file, \"a\") as f:\n                # 写入的数据包括coco指标还有loss和learning rate\n                result_info = [f\"{i:.4f}\" for i in key_info + [mean_loss.item()]] + [f\"{lr:.6f}\"]\n                txt = \"epoch:{} {}\".format(epoch, '  '.join(result_info))\n                f.write(txt + \"\\n\")\n\n        if args.output_dir:\n            # 只在主进程上执行保存权重操作\n            save_files = {'model': model_without_ddp.state_dict(),\n                          'optimizer': optimizer.state_dict(),\n                          'lr_scheduler': lr_scheduler.state_dict(),\n                          'args': args,\n                          'epoch': epoch}\n            if args.amp:\n                save_files[\"scaler\"] = scaler.state_dict()\n            save_on_master(save_files,\n                           os.path.join(args.output_dir, f'model_{epoch}.pth'))\n\n    total_time = time.time() - start_time\n    total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n    print('Training time {}'.format(total_time_str))\n\n    if args.rank in [-1, 0]:\n        # plot loss and lr curve\n        if len(train_loss) != 0 and len(learning_rate) != 0:\n            from plot_curve import plot_loss_and_lr\n            plot_loss_and_lr(train_loss, learning_rate)\n\n        # plot mAP curve\n        if len(val_map) != 0:\n            from plot_curve import plot_map\n            plot_map(val_map)\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 训练文件的根目录(coco2017)\n    parser.add_argument('--data-path', default='/data/coco2017', help='dataset')\n    # 训练设备类型\n    parser.add_argument('--device', default='cuda', help='device')\n    # COCO数据集人体关键点信息\n    parser.add_argument('--keypoints-path', default=\"./person_keypoints.json\", type=str,\n                        help='person_keypoints.json path')\n    # 原项目提供的验证集person检测信息，如果要使用GT信息，直接将该参数置为None，建议设置成None\n    parser.add_argument('--person-det', type=str, default=None)\n    parser.add_argument('--fixed-size', default=[256, 192], nargs='+', type=int, help='input size')\n    # 检测目标类别数(不包含背景)\n    parser.add_argument('--num-joints', default=17, type=int, help='num_joints(num_keypoints)')\n    # 每块GPU上的batch_size\n    parser.add_argument('-b', '--batch-size', default=32, type=int,\n                        help='images per gpu, the total batch size is $NGPU x batch_size')\n    # 指定接着从哪个epoch数开始训练\n    parser.add_argument('--start-epoch', default=0, type=int, help='start epoch')\n    # 训练的总epoch数\n    parser.add_argument('--epochs', default=210, type=int, metavar='N',\n                        help='number of total epochs to run')\n    # 数据加载以及预处理的线程数\n    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',\n                        help='number of data loading workers (default: 4)')\n    # 学习率\n    parser.add_argument('--lr', default=0.001, type=float,\n                        help='initial learning rate, 0.001 is the default value for training '\n                             'on 4 gpus and 32 images_per_gpu')\n    # AdamW的weight_decay参数\n    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,\n                        metavar='W', help='weight decay (default: 1e-4)',\n                        dest='weight_decay')\n    # 针对torch.optim.lr_scheduler.MultiStepLR的参数\n    parser.add_argument('--lr-steps', default=[170, 200], nargs='+', type=int,\n                        help='decrease lr every step-size epochs')\n    # 针对torch.optim.lr_scheduler.MultiStepLR的参数\n    parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')\n    # 训练过程打印信息的频率\n    parser.add_argument('--print-freq', default=50, type=int, help='print frequency')\n    # 文件保存地址\n    parser.add_argument('--output-dir', default='./multi_train', help='path where to save')\n    # 基于上次的训练结果接着训练\n    parser.add_argument('--resume', default='', help='resume from checkpoint')\n    parser.add_argument('--test-only', action=\"store_true\", help=\"test only\")\n\n    # 开启的进程数(注意不是线程)\n    parser.add_argument('--world-size', default=4, type=int,\n                        help='number of distributed processes')\n    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')\n    parser.add_argument(\"--sync-bn\", action=\"store_true\", help=\"Use sync batch norm\")\n    # 是否使用混合精度训练(需要GPU支持混合精度)\n    parser.add_argument(\"--amp\", action=\"store_true\", help=\"Use torch.cuda.amp for mixed precision training\")\n\n    args = parser.parse_args()\n\n    # 如果指定了保存文件地址，检查文件夹是否存在，若不存在，则创建\n    if args.output_dir:\n        mkdir(args.output_dir)\n\n    main(args)\n"
  },
  {
    "path": "pytorch_keypoint/HRNet/train_utils/__init__.py",
    "content": "from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups\nfrom .distributed_utils import init_distributed_mode, save_on_master, mkdir\nfrom .coco_eval import EvalCOCOMetric\nfrom .coco_utils import coco_remove_images_without_annotations, convert_coco_poly_mask, convert_to_coco_api\n"
  },
  {
    "path": "pytorch_keypoint/HRNet/train_utils/coco_eval.py",
    "content": "import json\nimport copy\n\nfrom PIL import Image, ImageDraw\nimport numpy as np\nfrom pycocotools.coco import COCO\nfrom pycocotools.cocoeval import COCOeval\nfrom .distributed_utils import all_gather, is_main_process\nfrom transforms import affine_points\n\n\ndef merge(img_ids, eval_results):\n    \"\"\"将多个进程之间的数据汇总在一起\"\"\"\n    all_img_ids = all_gather(img_ids)\n    all_eval_results = all_gather(eval_results)\n\n    merged_img_ids = []\n    for p in all_img_ids:\n        merged_img_ids.extend(p)\n\n    merged_eval_results = []\n    for p in all_eval_results:\n        merged_eval_results.extend(p)\n\n    merged_img_ids = np.array(merged_img_ids)\n\n    # keep only unique (and in sorted order) images\n    # 去除重复的图片索引，多GPU训练时为了保证每个进程的训练图片数量相同，可能将一张图片分配给多个进程\n    merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)\n    merged_eval_results = [merged_eval_results[i] for i in idx]\n\n    return list(merged_img_ids), merged_eval_results\n\n\nclass EvalCOCOMetric:\n    def __init__(self,\n                 coco: COCO = None,\n                 iou_type: str = \"keypoints\",\n                 results_file_name: str = \"predict_results.json\",\n                 classes_mapping: dict = None,\n                 threshold: float = 0.2):\n        self.coco = copy.deepcopy(coco)\n        self.obj_ids = []  # 记录每个进程处理目标(person)的ids\n        self.results = []\n        self.aggregation_results = None\n        self.classes_mapping = classes_mapping\n        self.coco_evaluator = None\n        assert iou_type in [\"keypoints\"]\n        self.iou_type = iou_type\n        self.results_file_name = results_file_name\n        self.threshold = threshold\n\n    def plot_img(self, img_path, keypoints, r=3):\n        img = Image.open(img_path)\n        draw = ImageDraw.Draw(img)\n        for i, point in enumerate(keypoints):\n            draw.ellipse([point[0] - r, point[1] - r, point[0] + r, point[1] + r],\n                         fill=(255, 0, 0))\n        img.show()\n\n    def prepare_for_coco_keypoints(self, targets, outputs):\n        # 遍历每个person的预测结果(注意这里不是每张，一张图片里可能有多个person)\n        for target, keypoints, scores in zip(targets, outputs[0], outputs[1]):\n            if len(keypoints) == 0:\n                continue\n\n            obj_idx = int(target[\"obj_index\"])\n            if obj_idx in self.obj_ids:\n                # 防止出现重复的数据\n                continue\n\n            self.obj_ids.append(obj_idx)\n            # self.plot_img(target[\"image_path\"], keypoints)\n\n            mask = np.greater(scores, 0.2)\n            if mask.sum() == 0:\n                k_score = 0\n            else:\n                k_score = np.mean(scores[mask])\n\n            keypoints = np.concatenate([keypoints, scores], axis=1)\n            keypoints = np.reshape(keypoints, -1)\n\n            # We recommend rounding coordinates to the nearest tenth of a pixel\n            # to reduce resulting JSON file size.\n            keypoints = [round(k, 2) for k in keypoints.tolist()]\n\n            res = {\"image_id\": target[\"image_id\"],\n                   \"category_id\": 1,  # person\n                   \"keypoints\": keypoints,\n                   \"score\": target[\"score\"] * k_score}\n\n            self.results.append(res)\n\n    def update(self, targets, outputs):\n        if self.iou_type == \"keypoints\":\n            self.prepare_for_coco_keypoints(targets, outputs)\n        else:\n            raise KeyError(f\"not support iou_type: {self.iou_type}\")\n\n    def synchronize_results(self):\n        # 同步所有进程中的数据\n        eval_ids, eval_results = merge(self.obj_ids, self.results)\n        self.aggregation_results = {\"obj_ids\": eval_ids, \"results\": eval_results}\n\n        # 主进程上保存即可\n        if is_main_process():\n            # results = []\n            # [results.extend(i) for i in eval_results]\n            # write predict results into json file\n            json_str = json.dumps(eval_results, indent=4)\n            with open(self.results_file_name, 'w') as json_file:\n                json_file.write(json_str)\n\n    def evaluate(self):\n        # 只在主进程上评估即可\n        if is_main_process():\n            # accumulate predictions from all images\n            coco_true = self.coco\n            coco_pre = coco_true.loadRes(self.results_file_name)\n\n            self.coco_evaluator = COCOeval(cocoGt=coco_true, cocoDt=coco_pre, iouType=self.iou_type)\n\n            self.coco_evaluator.evaluate()\n            self.coco_evaluator.accumulate()\n            print(f\"IoU metric: {self.iou_type}\")\n            self.coco_evaluator.summarize()\n\n            coco_info = self.coco_evaluator.stats.tolist()  # numpy to list\n            return coco_info\n        else:\n            return None\n"
  },
  {
    "path": "pytorch_keypoint/HRNet/train_utils/coco_utils.py",
    "content": "import torch\nimport torch.utils.data\nfrom pycocotools import mask as coco_mask\nfrom pycocotools.coco import COCO\n\n\ndef coco_remove_images_without_annotations(dataset, ids):\n    \"\"\"\n    删除coco数据集中没有目标，或者目标面积非常小的数据\n    refer to:\n    https://github.com/pytorch/vision/blob/master/references/detection/coco_utils.py\n    :param dataset:\n    :param cat_list:\n    :return:\n    \"\"\"\n    def _has_only_empty_bbox(anno):\n        return all(any(o <= 1 for o in obj[\"bbox\"][2:]) for obj in anno)\n\n    def _has_valid_annotation(anno):\n        # if it's empty, there is no annotation\n        if len(anno) == 0:\n            return False\n        # if all boxes have close to zero area, there is no annotation\n        if _has_only_empty_bbox(anno):\n            return False\n\n        return True\n\n    valid_ids = []\n    for ds_idx, img_id in enumerate(ids):\n        ann_ids = dataset.getAnnIds(imgIds=img_id, iscrowd=None)\n        anno = dataset.loadAnns(ann_ids)\n\n        if _has_valid_annotation(anno):\n            valid_ids.append(img_id)\n\n    return valid_ids\n\n\ndef convert_coco_poly_mask(segmentations, height, width):\n    masks = []\n    for polygons in segmentations:\n        rles = coco_mask.frPyObjects(polygons, height, width)\n        mask = coco_mask.decode(rles)\n        if len(mask.shape) < 3:\n            mask = mask[..., None]\n        mask = torch.as_tensor(mask, dtype=torch.uint8)\n        mask = mask.any(dim=2)\n        masks.append(mask)\n    if masks:\n        masks = torch.stack(masks, dim=0)\n    else:\n        # 如果mask为空，则说明没有目标，直接返回数值为0的mask\n        masks = torch.zeros((0, height, width), dtype=torch.uint8)\n    return masks\n\n\ndef convert_to_coco_api(self):\n    coco_ds = COCO()\n    # annotation IDs need to start at 1, not 0, see torchvision issue #1530\n    ann_id = 1\n    dataset = {\"images\": [], \"categories\": [], \"annotations\": []}\n    categories = set()\n    for img_idx in range(len(self)):\n        targets, h, w = self.get_annotations(img_idx)\n        img_id = targets[\"image_id\"].item()\n        img_dict = {\"id\": img_id,\n                    \"height\": h,\n                    \"width\": w}\n        dataset[\"images\"].append(img_dict)\n        bboxes = targets[\"boxes\"].clone()\n        # convert (x_min, ymin, xmax, ymax) to (xmin, ymin, w, h)\n        bboxes[:, 2:] -= bboxes[:, :2]\n        bboxes = bboxes.tolist()\n        labels = targets[\"labels\"].tolist()\n        areas = targets[\"area\"].tolist()\n        iscrowd = targets[\"iscrowd\"].tolist()\n        if \"masks\" in targets:\n            masks = targets[\"masks\"]\n            # make masks Fortran contiguous for coco_mask\n            masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1)\n        num_objs = len(bboxes)\n        for i in range(num_objs):\n            ann = {\"image_id\": img_id,\n                   \"bbox\": bboxes[i],\n                   \"category_id\": labels[i],\n                   \"area\": areas[i],\n                   \"iscrowd\": iscrowd[i],\n                   \"id\": ann_id}\n            categories.add(labels[i])\n            if \"masks\" in targets:\n                ann[\"segmentation\"] = coco_mask.encode(masks[i].numpy())\n            dataset[\"annotations\"].append(ann)\n            ann_id += 1\n    dataset[\"categories\"] = [{\"id\": i} for i in sorted(categories)]\n    coco_ds.dataset = dataset\n    coco_ds.createIndex()\n    return coco_ds\n"
  },
  {
    "path": "pytorch_keypoint/HRNet/train_utils/distributed_utils.py",
    "content": "from collections import defaultdict, deque\nimport datetime\nimport pickle\nimport time\nimport errno\nimport os\n\nimport torch\nimport torch.distributed as dist\n\n\nclass SmoothedValue(object):\n    \"\"\"Track a series of values and provide access to smoothed values over a\n    window or the global series average.\n    \"\"\"\n    def __init__(self, window_size=20, fmt=None):\n        if fmt is None:\n            fmt = \"{value:.4f} ({global_avg:.4f})\"\n        self.deque = deque(maxlen=window_size)  # deque简单理解成加强版list\n        self.total = 0.0\n        self.count = 0\n        self.fmt = fmt\n\n    def update(self, value, n=1):\n        self.deque.append(value)\n        self.count += n\n        self.total += value * n\n\n    def synchronize_between_processes(self):\n        \"\"\"\n        Warning: does not synchronize the deque!\n        \"\"\"\n        if not is_dist_avail_and_initialized():\n            return\n        t = torch.tensor([self.count, self.total], dtype=torch.float64, device=\"cuda\")\n        dist.barrier()\n        dist.all_reduce(t)\n        t = t.tolist()\n        self.count = int(t[0])\n        self.total = t[1]\n\n    @property\n    def median(self):  # @property 是装饰器，这里可简单理解为增加median属性(只读)\n        d = torch.tensor(list(self.deque))\n        return d.median().item()\n\n    @property\n    def avg(self):\n        d = torch.tensor(list(self.deque), dtype=torch.float32)\n        return d.mean().item()\n\n    @property\n    def global_avg(self):\n        return self.total / self.count\n\n    @property\n    def max(self):\n        return max(self.deque)\n\n    @property\n    def value(self):\n        return self.deque[-1]\n\n    def __str__(self):\n        return self.fmt.format(\n            median=self.median,\n            avg=self.avg,\n            global_avg=self.global_avg,\n            max=self.max,\n            value=self.value)\n\n\ndef all_gather(data):\n    \"\"\"\n    收集各个进程中的数据\n    Run all_gather on arbitrary picklable data (not necessarily tensors)\n    Args:\n        data: any picklable object\n    Returns:\n        list[data]: list of data gathered from each rank\n    \"\"\"\n    world_size = get_world_size()  # 进程数\n    if world_size == 1:\n        return [data]\n\n    data_list = [None] * world_size\n    dist.all_gather_object(data_list, data)\n\n    return data_list\n\n\ndef reduce_dict(input_dict, average=True):\n    \"\"\"\n    Args:\n        input_dict (dict): all the values will be reduced\n        average (bool): whether to do average or sum\n    Reduce the values in the dictionary from all processes so that all processes\n    have the averaged results. Returns a dict with the same fields as\n    input_dict, after reduction.\n    \"\"\"\n    world_size = get_world_size()\n    if world_size < 2:  # 单GPU的情况\n        return input_dict\n    with torch.no_grad():  # 多GPU的情况\n        names = []\n        values = []\n        # sort the keys so that they are consistent across processes\n        for k in sorted(input_dict.keys()):\n            names.append(k)\n            values.append(input_dict[k])\n        values = torch.stack(values, dim=0)\n        dist.all_reduce(values)\n        if average:\n            values /= world_size\n\n        reduced_dict = {k: v for k, v in zip(names, values)}\n        return reduced_dict\n\n\nclass MetricLogger(object):\n    def __init__(self, delimiter=\"\\t\"):\n        self.meters = defaultdict(SmoothedValue)\n        self.delimiter = delimiter\n\n    def update(self, **kwargs):\n        for k, v in kwargs.items():\n            if isinstance(v, torch.Tensor):\n                v = v.item()\n            assert isinstance(v, (float, int))\n            self.meters[k].update(v)\n\n    def __getattr__(self, attr):\n        if attr in self.meters:\n            return self.meters[attr]\n        if attr in self.__dict__:\n            return self.__dict__[attr]\n        raise AttributeError(\"'{}' object has no attribute '{}'\".format(\n            type(self).__name__, attr))\n\n    def __str__(self):\n        loss_str = []\n        for name, meter in self.meters.items():\n            loss_str.append(\n                \"{}: {}\".format(name, str(meter))\n            )\n        return self.delimiter.join(loss_str)\n\n    def synchronize_between_processes(self):\n        for meter in self.meters.values():\n            meter.synchronize_between_processes()\n\n    def add_meter(self, name, meter):\n        self.meters[name] = meter\n\n    def log_every(self, iterable, print_freq, header=None):\n        i = 0\n        if not header:\n            header = \"\"\n        start_time = time.time()\n        end = time.time()\n        iter_time = SmoothedValue(fmt='{avg:.4f}')\n        data_time = SmoothedValue(fmt='{avg:.4f}')\n        space_fmt = \":\" + str(len(str(len(iterable)))) + \"d\"\n        if torch.cuda.is_available():\n            log_msg = self.delimiter.join([header,\n                                           '[{0' + space_fmt + '}/{1}]',\n                                           'eta: {eta}',\n                                           '{meters}',\n                                           'time: {time}',\n                                           'data: {data}',\n                                           'max mem: {memory:.0f}'])\n        else:\n            log_msg = self.delimiter.join([header,\n                                           '[{0' + space_fmt + '}/{1}]',\n                                           'eta: {eta}',\n                                           '{meters}',\n                                           'time: {time}',\n                                           'data: {data}'])\n        MB = 1024.0 * 1024.0\n        for obj in iterable:\n            data_time.update(time.time() - end)\n            yield obj\n            iter_time.update(time.time() - end)\n            if i % print_freq == 0 or i == len(iterable) - 1:\n                eta_second = int(iter_time.global_avg * (len(iterable) - i))\n                eta_string = str(datetime.timedelta(seconds=eta_second))\n                if torch.cuda.is_available():\n                    print(log_msg.format(i, len(iterable),\n                                         eta=eta_string,\n                                         meters=str(self),\n                                         time=str(iter_time),\n                                         data=str(data_time),\n                                         memory=torch.cuda.max_memory_allocated() / MB))\n                else:\n                    print(log_msg.format(i, len(iterable),\n                                         eta=eta_string,\n                                         meters=str(self),\n                                         time=str(iter_time),\n                                         data=str(data_time)))\n            i += 1\n            end = time.time()\n        total_time = time.time() - start_time\n        total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n        print('{} Total time: {} ({:.4f} s / it)'.format(header,\n                                                         total_time_str,\n                                                         total_time / len(iterable)))\n\n\ndef warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):\n\n    def f(x):\n        \"\"\"根据step数返回一个学习率倍率因子\"\"\"\n        if x >= warmup_iters:  # 当迭代数大于给定的warmup_iters时，倍率因子为1\n            return 1\n        alpha = float(x) / warmup_iters\n        # 迭代过程中倍率因子从warmup_factor -> 1\n        return warmup_factor * (1 - alpha) + alpha\n\n    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)\n\n\ndef mkdir(path):\n    try:\n        os.makedirs(path)\n    except OSError as e:\n        if e.errno != errno.EEXIST:\n            raise\n\n\ndef setup_for_distributed(is_master):\n    \"\"\"\n    This function disables when not in master process\n    \"\"\"\n    import builtins as __builtin__\n    builtin_print = __builtin__.print\n\n    def print(*args, **kwargs):\n        force = kwargs.pop('force', False)\n        if is_master or force:\n            builtin_print(*args, **kwargs)\n\n    __builtin__.print = print\n\n\ndef is_dist_avail_and_initialized():\n    \"\"\"检查是否支持分布式环境\"\"\"\n    if not dist.is_available():\n        return False\n    if not dist.is_initialized():\n        return False\n    return True\n\n\ndef get_world_size():\n    if not is_dist_avail_and_initialized():\n        return 1\n    return dist.get_world_size()\n\n\ndef get_rank():\n    if not is_dist_avail_and_initialized():\n        return 0\n    return dist.get_rank()\n\n\ndef is_main_process():\n    return get_rank() == 0\n\n\ndef save_on_master(*args, **kwargs):\n    if is_main_process():\n        torch.save(*args, **kwargs)\n\n\ndef init_distributed_mode(args):\n    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:\n        args.rank = int(os.environ[\"RANK\"])\n        args.world_size = int(os.environ['WORLD_SIZE'])\n        args.gpu = int(os.environ['LOCAL_RANK'])\n    elif 'SLURM_PROCID' in os.environ:\n        args.rank = int(os.environ['SLURM_PROCID'])\n        args.gpu = args.rank % torch.cuda.device_count()\n    else:\n        print('Not using distributed mode')\n        args.distributed = False\n        return\n\n    args.distributed = True\n\n    torch.cuda.set_device(args.gpu)\n    args.dist_backend = 'nccl'\n    print('| distributed init (rank {}): {}'.format(\n        args.rank, args.dist_url), flush=True)\n    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,\n                                         world_size=args.world_size, rank=args.rank)\n    torch.distributed.barrier()\n    setup_for_distributed(args.rank == 0)\n\n"
  },
  {
    "path": "pytorch_keypoint/HRNet/train_utils/group_by_aspect_ratio.py",
    "content": "import bisect\nfrom collections import defaultdict\nimport copy\nfrom itertools import repeat, chain\nimport math\nimport numpy as np\n\nimport torch\nimport torch.utils.data\nfrom torch.utils.data.sampler import BatchSampler, Sampler\nfrom torch.utils.model_zoo import tqdm\nimport torchvision\n\nfrom PIL import Image\n\n\ndef _repeat_to_at_least(iterable, n):\n    repeat_times = math.ceil(n / len(iterable))\n    repeated = chain.from_iterable(repeat(iterable, repeat_times))\n    return list(repeated)\n\n\nclass GroupedBatchSampler(BatchSampler):\n    \"\"\"\n    Wraps another sampler to yield a mini-batch of indices.\n    It enforces that the batch only contain elements from the same group.\n    It also tries to provide mini-batches which follows an ordering which is\n    as close as possible to the ordering from the original sampler.\n    Arguments:\n        sampler (Sampler): Base sampler.\n        group_ids (list[int]): If the sampler produces indices in range [0, N),\n            `group_ids` must be a list of `N` ints which contains the group id of each sample.\n            The group ids must be a continuous set of integers starting from\n            0, i.e. they must be in the range [0, num_groups).\n        batch_size (int): Size of mini-batch.\n    \"\"\"\n    def __init__(self, sampler, group_ids, batch_size):\n        if not isinstance(sampler, Sampler):\n            raise ValueError(\n                \"sampler should be an instance of \"\n                \"torch.utils.data.Sampler, but got sampler={}\".format(sampler)\n            )\n        self.sampler = sampler\n        self.group_ids = group_ids\n        self.batch_size = batch_size\n\n    def __iter__(self):\n        buffer_per_group = defaultdict(list)\n        samples_per_group = defaultdict(list)\n\n        num_batches = 0\n        for idx in self.sampler:\n            group_id = self.group_ids[idx]\n            buffer_per_group[group_id].append(idx)\n            samples_per_group[group_id].append(idx)\n            if len(buffer_per_group[group_id]) == self.batch_size:\n                yield buffer_per_group[group_id]\n                num_batches += 1\n                del buffer_per_group[group_id]\n            assert len(buffer_per_group[group_id]) < self.batch_size\n\n        # now we have run out of elements that satisfy\n        # the group criteria, let's return the remaining\n        # elements so that the size of the sampler is\n        # deterministic\n        expected_num_batches = len(self)\n        num_remaining = expected_num_batches - num_batches\n        if num_remaining > 0:\n            # for the remaining batches, take first the buffers with largest number\n            # of elements\n            for group_id, _ in sorted(buffer_per_group.items(),\n                                      key=lambda x: len(x[1]), reverse=True):\n                remaining = self.batch_size - len(buffer_per_group[group_id])\n                samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining)\n                buffer_per_group[group_id].extend(samples_from_group_id[:remaining])\n                assert len(buffer_per_group[group_id]) == self.batch_size\n                yield buffer_per_group[group_id]\n                num_remaining -= 1\n                if num_remaining == 0:\n                    break\n        assert num_remaining == 0\n\n    def __len__(self):\n        return len(self.sampler) // self.batch_size\n\n\ndef _compute_aspect_ratios_slow(dataset, indices=None):\n    print(\"Your dataset doesn't support the fast path for \"\n          \"computing the aspect ratios, so will iterate over \"\n          \"the full dataset and load every image instead. \"\n          \"This might take some time...\")\n    if indices is None:\n        indices = range(len(dataset))\n\n    class SubsetSampler(Sampler):\n        def __init__(self, indices):\n            self.indices = indices\n\n        def __iter__(self):\n            return iter(self.indices)\n\n        def __len__(self):\n            return len(self.indices)\n\n    sampler = SubsetSampler(indices)\n    data_loader = torch.utils.data.DataLoader(\n        dataset, batch_size=1, sampler=sampler,\n        num_workers=14,  # you might want to increase it for faster processing\n        collate_fn=lambda x: x[0])\n    aspect_ratios = []\n    with tqdm(total=len(dataset)) as pbar:\n        for _i, (img, _) in enumerate(data_loader):\n            pbar.update(1)\n            height, width = img.shape[-2:]\n            aspect_ratio = float(width) / float(height)\n            aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_custom_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n    aspect_ratios = []\n    for i in indices:\n        height, width = dataset.get_height_and_width(i)\n        aspect_ratio = float(width) / float(height)\n        aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_coco_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n    aspect_ratios = []\n    for i in indices:\n        img_info = dataset.coco.imgs[dataset.ids[i]]\n        aspect_ratio = float(img_info[\"width\"]) / float(img_info[\"height\"])\n        aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_voc_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n    aspect_ratios = []\n    for i in indices:\n        # this doesn't load the data into memory, because PIL loads it lazily\n        width, height = Image.open(dataset.images[i]).size\n        aspect_ratio = float(width) / float(height)\n        aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_subset_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n\n    ds_indices = [dataset.indices[i] for i in indices]\n    return compute_aspect_ratios(dataset.dataset, ds_indices)\n\n\ndef compute_aspect_ratios(dataset, indices=None):\n    if hasattr(dataset, \"get_height_and_width\"):\n        return _compute_aspect_ratios_custom_dataset(dataset, indices)\n\n    if isinstance(dataset, torchvision.datasets.CocoDetection):\n        return _compute_aspect_ratios_coco_dataset(dataset, indices)\n\n    if isinstance(dataset, torchvision.datasets.VOCDetection):\n        return _compute_aspect_ratios_voc_dataset(dataset, indices)\n\n    if isinstance(dataset, torch.utils.data.Subset):\n        return _compute_aspect_ratios_subset_dataset(dataset, indices)\n\n    # slow path\n    return _compute_aspect_ratios_slow(dataset, indices)\n\n\ndef _quantize(x, bins):\n    bins = copy.deepcopy(bins)\n    bins = sorted(bins)\n    # bisect_right：寻找y元素按顺序应该排在bins中哪个元素的右边，返回的是索引\n    quantized = list(map(lambda y: bisect.bisect_right(bins, y), x))\n    return quantized\n\n\ndef create_aspect_ratio_groups(dataset, k=0):\n    # 计算所有数据集中的图片width/height比例\n    aspect_ratios = compute_aspect_ratios(dataset)\n    # 将[0.5, 2]区间划分成2*k+1等份\n    bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0]\n\n    # 统计所有图像比例在bins区间中的位置索引\n    groups = _quantize(aspect_ratios, bins)\n    # count number of elements per group\n    # 统计每个区间的频次\n    counts = np.unique(groups, return_counts=True)[1]\n    fbins = [0] + bins + [np.inf]\n    print(\"Using {} as bins for aspect ratio quantization\".format(fbins))\n    print(\"Count of instances per bin: {}\".format(counts))\n    return groups\n"
  },
  {
    "path": "pytorch_keypoint/HRNet/train_utils/loss.py",
    "content": "import torch\n\n\nclass KpLoss(object):\n    def __init__(self):\n        self.criterion = torch.nn.MSELoss(reduction='none')\n\n    def __call__(self, logits, targets):\n        assert len(logits.shape) == 4, 'logits should be 4-ndim'\n        device = logits.device\n        bs = logits.shape[0]\n        # [num_kps, H, W] -> [B, num_kps, H, W]\n        heatmaps = torch.stack([t[\"heatmap\"].to(device) for t in targets])\n        # [num_kps] -> [B, num_kps]\n        kps_weights = torch.stack([t[\"kps_weights\"].to(device) for t in targets])\n\n        # [B, num_kps, H, W] -> [B, num_kps]\n        loss = self.criterion(logits, heatmaps).mean(dim=[2, 3])\n        loss = torch.sum(loss * kps_weights) / bs\n        return loss\n"
  },
  {
    "path": "pytorch_keypoint/HRNet/train_utils/train_eval_utils.py",
    "content": "import math\nimport sys\nimport time\n\nimport torch\n\nimport transforms\nimport train_utils.distributed_utils as utils\nfrom .coco_eval import EvalCOCOMetric\nfrom .loss import KpLoss\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch,\n                    print_freq=50, warmup=False, scaler=None):\n    model.train()\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))\n    header = 'Epoch: [{}]'.format(epoch)\n\n    lr_scheduler = None\n    if epoch == 0 and warmup is True:  # 当训练第一轮（epoch=0）时，启用warmup训练方式，可理解为热身训练\n        warmup_factor = 1.0 / 1000\n        warmup_iters = min(1000, len(data_loader) - 1)\n\n        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)\n\n    mse = KpLoss()\n    mloss = torch.zeros(1).to(device)  # mean losses\n    for i, [images, targets] in enumerate(metric_logger.log_every(data_loader, print_freq, header)):\n        images = torch.stack([image.to(device) for image in images])\n\n        # 混合精度训练上下文管理器，如果在CPU环境中不起任何作用\n        with torch.cuda.amp.autocast(enabled=scaler is not None):\n            results = model(images)\n\n            losses = mse(results, targets)\n\n        # reduce losses over all GPUs for logging purpose\n        loss_dict_reduced = utils.reduce_dict({\"losses\": losses})\n        losses_reduced = sum(loss for loss in loss_dict_reduced.values())\n\n        loss_value = losses_reduced.item()\n        # 记录训练损失\n        mloss = (mloss * i + loss_value) / (i + 1)  # update mean losses\n\n        if not math.isfinite(loss_value):  # 当计算的损失为无穷大时停止训练\n            print(\"Loss is {}, stopping training\".format(loss_value))\n            print(loss_dict_reduced)\n            sys.exit(1)\n\n        optimizer.zero_grad()\n        if scaler is not None:\n            scaler.scale(losses).backward()\n            scaler.step(optimizer)\n            scaler.update()\n        else:\n            losses.backward()\n            optimizer.step()\n\n        if lr_scheduler is not None:  # 第一轮使用warmup训练方式\n            lr_scheduler.step()\n\n        metric_logger.update(loss=losses_reduced)\n        now_lr = optimizer.param_groups[0][\"lr\"]\n        metric_logger.update(lr=now_lr)\n\n    return mloss, now_lr\n\n\n@torch.no_grad()\ndef evaluate(model, data_loader, device, flip=False, flip_pairs=None):\n    if flip:\n        assert flip_pairs is not None, \"enable flip must provide flip_pairs.\"\n\n    model.eval()\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    header = \"Test: \"\n\n    key_metric = EvalCOCOMetric(data_loader.dataset.coco, \"keypoints\", \"key_results.json\")\n    for image, targets in metric_logger.log_every(data_loader, 100, header):\n        images = torch.stack([img.to(device) for img in image])\n\n        # 当使用CPU时，跳过GPU相关指令\n        if device != torch.device(\"cpu\"):\n            torch.cuda.synchronize(device)\n\n        model_time = time.time()\n        outputs = model(images)\n        if flip:\n            flipped_images = transforms.flip_images(images)\n            flipped_outputs = model(flipped_images)\n            flipped_outputs = transforms.flip_back(flipped_outputs, flip_pairs)\n            # feature is not aligned, shift flipped heatmap for higher accuracy\n            # https://github.com/leoxiaobin/deep-high-resolution-net.pytorch/issues/22\n            flipped_outputs[..., 1:] = flipped_outputs.clone()[..., 0:-1]\n            outputs = (outputs + flipped_outputs) * 0.5\n\n        model_time = time.time() - model_time\n\n        # decode keypoint\n        reverse_trans = [t[\"reverse_trans\"] for t in targets]\n        outputs = transforms.get_final_preds(outputs, reverse_trans, post_processing=True)\n\n        key_metric.update(targets, outputs)\n        metric_logger.update(model_time=model_time)\n\n    # gather the stats from all processes\n    metric_logger.synchronize_between_processes()\n    print(\"Averaged stats:\", metric_logger)\n\n    # 同步所有进程中的数据\n    key_metric.synchronize_results()\n\n    if utils.is_main_process():\n        coco_info = key_metric.evaluate()\n    else:\n        coco_info = None\n\n    return coco_info\n"
  },
  {
    "path": "pytorch_keypoint/HRNet/transforms.py",
    "content": "import math\nimport random\nfrom typing import Tuple\n\nimport cv2\nimport numpy as np\nimport torch\nfrom torchvision.transforms import functional as F\nimport matplotlib.pyplot as plt\n\n\ndef flip_images(img):\n    assert len(img.shape) == 4, 'images has to be [batch_size, channels, height, width]'\n    img = torch.flip(img, dims=[3])\n    return img\n\n\ndef flip_back(output_flipped, matched_parts):\n    assert len(output_flipped.shape) == 4, 'output_flipped has to be [batch_size, num_joints, height, width]'\n    output_flipped = torch.flip(output_flipped, dims=[3])\n\n    for pair in matched_parts:\n        tmp = output_flipped[:, pair[0]].clone()\n        output_flipped[:, pair[0]] = output_flipped[:, pair[1]]\n        output_flipped[:, pair[1]] = tmp\n\n    return output_flipped\n\n\ndef get_max_preds(batch_heatmaps):\n    \"\"\"\n    get predictions from score maps\n    heatmaps: numpy.ndarray([batch_size, num_joints, height, width])\n    \"\"\"\n    assert isinstance(batch_heatmaps, torch.Tensor), 'batch_heatmaps should be torch.Tensor'\n    assert len(batch_heatmaps.shape) == 4, 'batch_images should be 4-ndim'\n\n    batch_size, num_joints, h, w = batch_heatmaps.shape\n    heatmaps_reshaped = batch_heatmaps.reshape(batch_size, num_joints, -1)\n    maxvals, idx = torch.max(heatmaps_reshaped, dim=2)\n\n    maxvals = maxvals.unsqueeze(dim=-1)\n    idx = idx.float()\n\n    preds = torch.zeros((batch_size, num_joints, 2)).to(batch_heatmaps)\n\n    preds[:, :, 0] = idx % w  # column 对应最大值的x坐标\n    preds[:, :, 1] = torch.floor(idx / w)  # row 对应最大值的y坐标\n\n    pred_mask = torch.gt(maxvals, 0.0).repeat(1, 1, 2).float().to(batch_heatmaps.device)\n\n    preds *= pred_mask\n    return preds, maxvals\n\n\ndef affine_points(pt, t):\n    ones = np.ones((pt.shape[0], 1), dtype=float)\n    pt = np.concatenate([pt, ones], axis=1).T\n    new_pt = np.dot(t, pt)\n    return new_pt.T\n\n\ndef get_final_preds(batch_heatmaps: torch.Tensor,\n                    trans: list = None,\n                    post_processing: bool = False):\n    assert trans is not None\n    coords, maxvals = get_max_preds(batch_heatmaps)\n\n    heatmap_height = batch_heatmaps.shape[2]\n    heatmap_width = batch_heatmaps.shape[3]\n\n    # post-processing\n    if post_processing:\n        for n in range(coords.shape[0]):\n            for p in range(coords.shape[1]):\n                hm = batch_heatmaps[n][p]\n                px = int(math.floor(coords[n][p][0] + 0.5))\n                py = int(math.floor(coords[n][p][1] + 0.5))\n                if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1:\n                    diff = torch.tensor(\n                        [\n                            hm[py][px + 1] - hm[py][px - 1],\n                            hm[py + 1][px] - hm[py - 1][px]\n                        ]\n                    ).to(batch_heatmaps.device)\n                    coords[n][p] += torch.sign(diff) * .25\n\n    preds = coords.clone().cpu().numpy()\n\n    # Transform back\n    for i in range(coords.shape[0]):\n        preds[i] = affine_points(preds[i], trans[i])\n\n    return preds, maxvals.cpu().numpy()\n\n\ndef decode_keypoints(outputs, origin_hw, num_joints: int = 17):\n    keypoints = []\n    scores = []\n    heatmap_h, heatmap_w = outputs.shape[-2:]\n    for i in range(num_joints):\n        pt = np.unravel_index(np.argmax(outputs[i]), (heatmap_h, heatmap_w))\n        score = outputs[i, pt[0], pt[1]]\n        keypoints.append(pt[::-1])  # hw -> wh(xy)\n        scores.append(score)\n\n    keypoints = np.array(keypoints, dtype=float)\n    scores = np.array(scores, dtype=float)\n    # convert to full image scale\n    keypoints[:, 0] = np.clip(keypoints[:, 0] / heatmap_w * origin_hw[1],\n                              a_min=0,\n                              a_max=origin_hw[1])\n    keypoints[:, 1] = np.clip(keypoints[:, 1] / heatmap_h * origin_hw[0],\n                              a_min=0,\n                              a_max=origin_hw[0])\n    return keypoints, scores\n\n\ndef resize_pad(img: np.ndarray, size: tuple):\n    h, w, c = img.shape\n    src = np.array([[0, 0],       # 原坐标系中图像左上角点\n                    [w - 1, 0],   # 原坐标系中图像右上角点\n                    [0, h - 1]],  # 原坐标系中图像左下角点\n                   dtype=np.float32)\n    dst = np.zeros((3, 2), dtype=np.float32)\n    if h / w > size[0] / size[1]:\n        # 需要在w方向padding\n        wi = size[0] * (w / h)\n        pad_w = (size[1] - wi) / 2\n        dst[0, :] = [pad_w - 1, 0]            # 目标坐标系中图像左上角点\n        dst[1, :] = [size[1] - pad_w - 1, 0]  # 目标坐标系中图像右上角点\n        dst[2, :] = [pad_w - 1, size[0] - 1]  # 目标坐标系中图像左下角点\n    else:\n        # 需要在h方向padding\n        hi = size[1] * (h / w)\n        pad_h = (size[0] - hi) / 2\n        dst[0, :] = [0, pad_h - 1]            # 目标坐标系中图像左上角点\n        dst[1, :] = [size[1] - 1, pad_h - 1]  # 目标坐标系中图像右上角点\n        dst[2, :] = [0, size[0] - pad_h - 1]  # 目标坐标系中图像左下角点\n\n    trans = cv2.getAffineTransform(src, dst)  # 计算正向仿射变换矩阵\n    # 对图像进行仿射变换\n    resize_img = cv2.warpAffine(img,\n                                trans,\n                                size[::-1],  # w, h\n                                flags=cv2.INTER_LINEAR)\n    # import matplotlib.pyplot as plt\n    # plt.imshow(resize_img)\n    # plt.show()\n\n    dst /= 4  # 网络预测的heatmap尺寸是输入图像的1/4\n    reverse_trans = cv2.getAffineTransform(dst, src)  # 计算逆向仿射变换矩阵，方便后续还原\n\n    return resize_img, reverse_trans\n\n\ndef adjust_box(xmin: float, ymin: float, w: float, h: float, fixed_size: Tuple[float, float]):\n    \"\"\"通过增加w或者h的方式保证输入图片的长宽比固定\"\"\"\n    xmax = xmin + w\n    ymax = ymin + h\n\n    hw_ratio = fixed_size[0] / fixed_size[1]\n    if h / w > hw_ratio:\n        # 需要在w方向padding\n        wi = h / hw_ratio\n        pad_w = (wi - w) / 2\n        xmin = xmin - pad_w\n        xmax = xmax + pad_w\n    else:\n        # 需要在h方向padding\n        hi = w * hw_ratio\n        pad_h = (hi - h) / 2\n        ymin = ymin - pad_h\n        ymax = ymax + pad_h\n\n    return xmin, ymin, xmax, ymax\n\n\ndef scale_box(xmin: float, ymin: float, w: float, h: float, scale_ratio: Tuple[float, float]):\n    \"\"\"根据传入的h、w缩放因子scale_ratio，重新计算xmin，ymin，w，h\"\"\"\n    s_h = h * scale_ratio[0]\n    s_w = w * scale_ratio[1]\n    xmin = xmin - (s_w - w) / 2.\n    ymin = ymin - (s_h - h) / 2.\n    return xmin, ymin, s_w, s_h\n\n\ndef plot_heatmap(image, heatmap, kps, kps_weights):\n    for kp_id in range(len(kps_weights)):\n        if kps_weights[kp_id] > 0:\n            plt.subplot(1, 2, 1)\n            plt.imshow(image)\n            plt.plot(*kps[kp_id].tolist(), \"ro\")\n            plt.title(\"image\")\n            plt.subplot(1, 2, 2)\n            plt.imshow(heatmap[kp_id], cmap=plt.cm.Blues)\n            plt.colorbar(ticks=[0, 1])\n            plt.title(f\"kp_id: {kp_id}\")\n            plt.show()\n\n\nclass Compose(object):\n    \"\"\"组合多个transform函数\"\"\"\n    def __init__(self, transforms):\n        self.transforms = transforms\n\n    def __call__(self, image, target):\n        for t in self.transforms:\n            image, target = t(image, target)\n        return image, target\n\n\nclass ToTensor(object):\n    \"\"\"将PIL图像转为Tensor\"\"\"\n    def __call__(self, image, target):\n        image = F.to_tensor(image)\n        return image, target\n\n\nclass Normalize(object):\n    def __init__(self, mean=None, std=None):\n        self.mean = mean\n        self.std = std\n\n    def __call__(self, image, target):\n        image = F.normalize(image, mean=self.mean, std=self.std)\n        return image, target\n\n\nclass HalfBody(object):\n    def __init__(self, p: float = 0.3, upper_body_ids=None, lower_body_ids=None):\n        assert upper_body_ids is not None\n        assert lower_body_ids is not None\n        self.p = p\n        self.upper_body_ids = upper_body_ids\n        self.lower_body_ids = lower_body_ids\n\n    def __call__(self, image, target):\n        if random.random() < self.p:\n            kps = target[\"keypoints\"]\n            vis = target[\"visible\"]\n            upper_kps = []\n            lower_kps = []\n\n            # 对可见的keypoints进行归类\n            for i, v in enumerate(vis):\n                if v > 0.5:\n                    if i in self.upper_body_ids:\n                        upper_kps.append(kps[i])\n                    else:\n                        lower_kps.append(kps[i])\n\n            # 50%的概率选择上或下半身\n            if random.random() < 0.5:\n                selected_kps = upper_kps\n            else:\n                selected_kps = lower_kps\n\n            # 如果点数太少就不做任何处理\n            if len(selected_kps) > 2:\n                selected_kps = np.array(selected_kps, dtype=np.float32)\n                xmin, ymin = np.min(selected_kps, axis=0).tolist()\n                xmax, ymax = np.max(selected_kps, axis=0).tolist()\n                w = xmax - xmin\n                h = ymax - ymin\n                if w > 1 and h > 1:\n                    # 把w和h适当放大点，要不然关键点处于边缘位置\n                    xmin, ymin, w, h = scale_box(xmin, ymin, w, h, (1.5, 1.5))\n                    target[\"box\"] = [xmin, ymin, w, h]\n\n        return image, target\n\n\nclass AffineTransform(object):\n    \"\"\"scale+rotation\"\"\"\n    def __init__(self,\n                 scale: Tuple[float, float] = None,  # e.g. (0.65, 1.35)\n                 rotation: Tuple[int, int] = None,   # e.g. (-45, 45)\n                 fixed_size: Tuple[int, int] = (256, 192)):\n        self.scale = scale\n        self.rotation = rotation\n        self.fixed_size = fixed_size\n\n    def __call__(self, img, target):\n        src_xmin, src_ymin, src_xmax, src_ymax = adjust_box(*target[\"box\"], fixed_size=self.fixed_size)\n        src_w = src_xmax - src_xmin\n        src_h = src_ymax - src_ymin\n        src_center = np.array([(src_xmin + src_xmax) / 2, (src_ymin + src_ymax) / 2])\n        src_p2 = src_center + np.array([0, -src_h / 2])  # top middle\n        src_p3 = src_center + np.array([src_w / 2, 0])   # right middle\n\n        dst_center = np.array([(self.fixed_size[1] - 1) / 2, (self.fixed_size[0] - 1) / 2])\n        dst_p2 = np.array([(self.fixed_size[1] - 1) / 2, 0])  # top middle\n        dst_p3 = np.array([self.fixed_size[1] - 1, (self.fixed_size[0] - 1) / 2])  # right middle\n\n        if self.scale is not None:\n            scale = random.uniform(*self.scale)\n            src_w = src_w * scale\n            src_h = src_h * scale\n            src_p2 = src_center + np.array([0, -src_h / 2])  # top middle\n            src_p3 = src_center + np.array([src_w / 2, 0])   # right middle\n\n        if self.rotation is not None:\n            angle = random.randint(*self.rotation)  # 角度制\n            angle = angle / 180 * math.pi  # 弧度制\n            src_p2 = src_center + np.array([src_h / 2 * math.sin(angle), -src_h / 2 * math.cos(angle)])\n            src_p3 = src_center + np.array([src_w / 2 * math.cos(angle), src_w / 2 * math.sin(angle)])\n\n        src = np.stack([src_center, src_p2, src_p3]).astype(np.float32)\n        dst = np.stack([dst_center, dst_p2, dst_p3]).astype(np.float32)\n\n        trans = cv2.getAffineTransform(src, dst)  # 计算正向仿射变换矩阵\n        dst /= 4  # 网络预测的heatmap尺寸是输入图像的1/4\n        reverse_trans = cv2.getAffineTransform(dst, src)  # 计算逆向仿射变换矩阵，方便后续还原\n\n        # 对图像进行仿射变换\n        resize_img = cv2.warpAffine(img,\n                                    trans,\n                                    tuple(self.fixed_size[::-1]),  # [w, h]\n                                    flags=cv2.INTER_LINEAR)\n\n        if \"keypoints\" in target:\n            kps = target[\"keypoints\"]\n            mask = np.logical_and(kps[:, 0] != 0, kps[:, 1] != 0)\n            kps[mask] = affine_points(kps[mask], trans)\n            target[\"keypoints\"] = kps\n\n        # import matplotlib.pyplot as plt\n        # from draw_utils import draw_keypoints\n        # resize_img = draw_keypoints(resize_img, target[\"keypoints\"])\n        # plt.imshow(resize_img)\n        # plt.show()\n\n        target[\"trans\"] = trans\n        target[\"reverse_trans\"] = reverse_trans\n        return resize_img, target\n\n\nclass RandomHorizontalFlip(object):\n    \"\"\"随机对输入图片进行水平翻转，注意该方法必须接在 AffineTransform 后\"\"\"\n    def __init__(self, p: float = 0.5, matched_parts: list = None):\n        assert matched_parts is not None\n        self.p = p\n        self.matched_parts = matched_parts\n\n    def __call__(self, image, target):\n        if random.random() < self.p:\n            # [h, w, c]\n            image = np.ascontiguousarray(np.flip(image, axis=[1]))\n            keypoints = target[\"keypoints\"]\n            visible = target[\"visible\"]\n            width = image.shape[1]\n\n            # Flip horizontal\n            keypoints[:, 0] = width - keypoints[:, 0] - 1\n\n            # Change left-right parts\n            for pair in self.matched_parts:\n                keypoints[pair[0], :], keypoints[pair[1], :] = \\\n                    keypoints[pair[1], :], keypoints[pair[0], :].copy()\n\n                visible[pair[0]], visible[pair[1]] = \\\n                    visible[pair[1]], visible[pair[0]].copy()\n\n            target[\"keypoints\"] = keypoints\n            target[\"visible\"] = visible\n\n        return image, target\n\n\nclass KeypointToHeatMap(object):\n    def __init__(self,\n                 heatmap_hw: Tuple[int, int] = (256 // 4, 192 // 4),\n                 gaussian_sigma: int = 2,\n                 keypoints_weights=None):\n        self.heatmap_hw = heatmap_hw\n        self.sigma = gaussian_sigma\n        self.kernel_radius = self.sigma * 3\n        self.use_kps_weights = False if keypoints_weights is None else True\n        self.kps_weights = keypoints_weights\n\n        # generate gaussian kernel(not normalized)\n        kernel_size = 2 * self.kernel_radius + 1\n        kernel = np.zeros((kernel_size, kernel_size), dtype=np.float32)\n        x_center = y_center = kernel_size // 2\n        for x in range(kernel_size):\n            for y in range(kernel_size):\n                kernel[y, x] = np.exp(-((x - x_center) ** 2 + (y - y_center) ** 2) / (2 * self.sigma ** 2))\n        # print(kernel)\n\n        self.kernel = kernel\n\n    def __call__(self, image, target):\n        kps = target[\"keypoints\"]\n        num_kps = kps.shape[0]\n        kps_weights = np.ones((num_kps,), dtype=np.float32)\n        if \"visible\" in target:\n            visible = target[\"visible\"]\n            kps_weights = visible\n\n        heatmap = np.zeros((num_kps, self.heatmap_hw[0], self.heatmap_hw[1]), dtype=np.float32)\n        heatmap_kps = (kps / 4 + 0.5).astype(np.int)  # round\n        for kp_id in range(num_kps):\n            v = kps_weights[kp_id]\n            if v < 0.5:\n                # 如果该点的可见度很低，则直接忽略\n                continue\n\n            x, y = heatmap_kps[kp_id]\n            ul = [x - self.kernel_radius, y - self.kernel_radius]  # up-left x,y\n            br = [x + self.kernel_radius, y + self.kernel_radius]  # bottom-right x,y\n            # 如果以xy为中心kernel_radius为半径的辐射范围内与heatmap没交集，则忽略该点(该规则并不严格)\n            if ul[0] > self.heatmap_hw[1] - 1 or \\\n                    ul[1] > self.heatmap_hw[0] - 1 or \\\n                    br[0] < 0 or \\\n                    br[1] < 0:\n                # If not, just return the image as is\n                kps_weights[kp_id] = 0\n                continue\n\n            # Usable gaussian range\n            # 计算高斯核有效区域（高斯核坐标系）\n            g_x = (max(0, -ul[0]), min(br[0], self.heatmap_hw[1] - 1) - ul[0])\n            g_y = (max(0, -ul[1]), min(br[1], self.heatmap_hw[0] - 1) - ul[1])\n            # image range\n            # 计算heatmap中的有效区域（heatmap坐标系）\n            img_x = (max(0, ul[0]), min(br[0], self.heatmap_hw[1] - 1))\n            img_y = (max(0, ul[1]), min(br[1], self.heatmap_hw[0] - 1))\n\n            if kps_weights[kp_id] > 0.5:\n                # 将高斯核有效区域复制到heatmap对应区域\n                heatmap[kp_id][img_y[0]:img_y[1] + 1, img_x[0]:img_x[1] + 1] = \\\n                    self.kernel[g_y[0]:g_y[1] + 1, g_x[0]:g_x[1] + 1]\n\n        if self.use_kps_weights:\n            kps_weights = np.multiply(kps_weights, self.kps_weights)\n\n        # plot_heatmap(image, heatmap, kps, kps_weights)\n\n        target[\"heatmap\"] = torch.as_tensor(heatmap, dtype=torch.float32)\n        target[\"kps_weights\"] = torch.as_tensor(kps_weights, dtype=torch.float32)\n\n        return image, target\n"
  },
  {
    "path": "pytorch_keypoint/HRNet/validation.py",
    "content": "\"\"\"\n该脚本用于调用训练好的模型权重去计算验证集/测试集的COCO指标\n\"\"\"\n\nimport os\nimport json\n\nimport torch\nfrom tqdm import tqdm\nimport numpy as np\n\nfrom model import HighResolutionNet\nfrom train_utils import EvalCOCOMetric\nfrom my_dataset_coco import CocoKeypoint\nimport transforms\n\n\ndef summarize(self, catId=None):\n    \"\"\"\n    Compute and display summary metrics for evaluation results.\n    Note this functin can *only* be applied on the default parameter setting\n    \"\"\"\n\n    def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100):\n        p = self.params\n        iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'\n        titleStr = 'Average Precision' if ap == 1 else 'Average Recall'\n        typeStr = '(AP)' if ap == 1 else '(AR)'\n        iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \\\n            if iouThr is None else '{:0.2f}'.format(iouThr)\n\n        aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]\n        mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]\n\n        if ap == 1:\n            # dimension of precision: [TxRxKxAxM]\n            s = self.eval['precision']\n            # IoU\n            if iouThr is not None:\n                t = np.where(iouThr == p.iouThrs)[0]\n                s = s[t]\n\n            if isinstance(catId, int):\n                s = s[:, :, catId, aind, mind]\n            else:\n                s = s[:, :, :, aind, mind]\n\n        else:\n            # dimension of recall: [TxKxAxM]\n            s = self.eval['recall']\n            if iouThr is not None:\n                t = np.where(iouThr == p.iouThrs)[0]\n                s = s[t]\n\n            if isinstance(catId, int):\n                s = s[:, catId, aind, mind]\n            else:\n                s = s[:, :, aind, mind]\n\n        if len(s[s > -1]) == 0:\n            mean_s = -1\n        else:\n            mean_s = np.mean(s[s > -1])\n\n        print_string = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)\n        return mean_s, print_string\n\n    stats, print_list = [0] * 10, [\"\"] * 10\n    stats[0], print_list[0] = _summarize(1, maxDets=20)\n    stats[1], print_list[1] = _summarize(1, maxDets=20, iouThr=.5)\n    stats[2], print_list[2] = _summarize(1, maxDets=20, iouThr=.75)\n    stats[3], print_list[3] = _summarize(1, maxDets=20, areaRng='medium')\n    stats[4], print_list[4] = _summarize(1, maxDets=20, areaRng='large')\n    stats[5], print_list[5] = _summarize(0, maxDets=20)\n    stats[6], print_list[6] = _summarize(0, maxDets=20, iouThr=.5)\n    stats[7], print_list[7] = _summarize(0, maxDets=20, iouThr=.75)\n    stats[8], print_list[8] = _summarize(0, maxDets=20, areaRng='medium')\n    stats[9], print_list[9] = _summarize(0, maxDets=20, areaRng='large')\n\n    print_info = \"\\n\".join(print_list)\n\n    if not self.eval:\n        raise Exception('Please run accumulate() first')\n\n    return stats, print_info\n\n\ndef save_info(coco_evaluator,\n              save_name: str = \"record_mAP.txt\"):\n    # calculate COCO info for all keypoints\n    coco_stats, print_coco = summarize(coco_evaluator)\n\n    # 将验证结果保存至txt文件中\n    with open(save_name, \"w\") as f:\n        record_lines = [\"COCO results:\", print_coco]\n        f.write(\"\\n\".join(record_lines))\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n    print(\"Using {} device training.\".format(device.type))\n\n    data_transform = {\n        \"val\": transforms.Compose([\n            transforms.AffineTransform(scale=(1.25, 1.25), fixed_size=args.resize_hw),\n            transforms.ToTensor(),\n            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n        ])\n    }\n\n    # read class_indict\n    label_json_path = args.label_json_path\n    assert os.path.exists(label_json_path), \"json file {} dose not exist.\".format(label_json_path)\n    with open(label_json_path, 'r') as f:\n        person_coco_info = json.load(f)\n\n    data_root = args.data_path\n\n    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch\n    batch_size = args.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using %g dataloader workers' % nw)\n\n    # load validation data set\n    val_dataset = CocoKeypoint(data_root, \"val\", transforms=data_transform[\"val\"], det_json_path=None)\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt\n    # val_dataset = VOCInstances(data_root, year=\"2012\", txt_name=\"val.txt\", transforms=data_transform[\"val\"])\n    val_dataset_loader = torch.utils.data.DataLoader(val_dataset,\n                                                     batch_size=batch_size,\n                                                     shuffle=False,\n                                                     pin_memory=True,\n                                                     num_workers=nw,\n                                                     collate_fn=val_dataset.collate_fn)\n\n    # create model\n    model = HighResolutionNet()\n\n    # 载入你自己训练好的模型权重\n    weights_path = args.weights_path\n    assert os.path.exists(weights_path), \"not found {} file.\".format(weights_path)\n    model.load_state_dict(torch.load(weights_path, map_location='cpu'))\n    # print(model)\n    model.to(device)\n\n    # evaluate on the val dataset\n    key_metric = EvalCOCOMetric(val_dataset.coco, \"keypoints\", \"key_results.json\")\n    model.eval()\n    with torch.no_grad():\n        for images, targets in tqdm(val_dataset_loader, desc=\"validation...\"):\n            # 将图片传入指定设备device\n            images = images.to(device)\n\n            # inference\n            outputs = model(images)\n            if args.flip:\n                flipped_images = transforms.flip_images(images)\n                flipped_outputs = model(flipped_images)\n                flipped_outputs = transforms.flip_back(flipped_outputs, person_coco_info[\"flip_pairs\"])\n                # feature is not aligned, shift flipped heatmap for higher accuracy\n                # https://github.com/leoxiaobin/deep-high-resolution-net.pytorch/issues/22\n                flipped_outputs[..., 1:] = flipped_outputs.clone()[..., 0:-1]\n                outputs = (outputs + flipped_outputs) * 0.5\n\n            # decode keypoint\n            reverse_trans = [t[\"reverse_trans\"] for t in targets]\n            outputs = transforms.get_final_preds(outputs, reverse_trans, post_processing=True)\n\n            key_metric.update(targets, outputs)\n\n    key_metric.synchronize_results()\n    key_metric.evaluate()\n\n    save_info(key_metric.coco_evaluator, \"keypoint_record_mAP.txt\")\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 使用设备类型\n    parser.add_argument('--device', default='cuda:0', help='device')\n\n    parser.add_argument('--resize-hw', type=list, default=[256, 192], help=\"resize for predict\")\n    # 是否开启图像翻转\n    parser.add_argument('--flip', type=bool, default=True, help='whether using flipped images')\n\n    # 数据集的根目录\n    parser.add_argument('--data-path', default='/data/coco2017', help='dataset root')\n\n    # 训练好的权重文件\n    parser.add_argument('--weights-path', default='./pose_hrnet_w32_256x192.pth', type=str, help='training weights')\n\n    # batch size\n    parser.add_argument('--batch-size', default=1, type=int, metavar='N',\n                        help='batch size when validation.')\n    # 类别索引和类别名称对应关系\n    parser.add_argument('--label-json-path', type=str, default=\"person_keypoints.json\")\n    # 原项目提供的验证集person检测信息，如果要使用GT信息，直接将该参数置为None\n    parser.add_argument('--person-det', type=str, default=\"./COCO_val2017_detections_AP_H_56_person.json\")\n\n    args = parser.parse_args()\n\n    main(args)\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/README.md",
    "content": "# Faster R-CNN\n\n## 该项目主要是来自pytorch官方torchvision模块中的源码\n* https://github.com/pytorch/vision/tree/master/torchvision/models/detection\n\n## 环境配置：\n* Python3.6/3.7/3.8\n* Pytorch1.7.1(注意：必须是1.6.0或以上，因为使用官方提供的混合精度训练1.6.0后才支持)\n* pycocotools(Linux:`pip install pycocotools`; Windows:`pip install pycocotools-windows`(不需要额外安装vs))\n* Ubuntu或Centos(不建议Windows)\n* 最好使用GPU训练\n* 详细环境配置见`requirements.txt`\n\n## 文件结构：\n```\n  ├── backbone: 特征提取网络，可以根据自己的要求选择\n  ├── network_files: Faster R-CNN网络（包括Fast R-CNN以及RPN等模块）\n  ├── train_utils: 训练验证相关模块（包括cocotools）\n  ├── my_dataset.py: 自定义dataset用于读取VOC数据集\n  ├── train_mobilenet.py: 以MobileNetV2做为backbone进行训练\n  ├── train_resnet50_fpn.py: 以resnet50+FPN做为backbone进行训练\n  ├── train_multi_GPU.py: 针对使用多GPU的用户使用\n  ├── predict.py: 简易的预测脚本，使用训练好的权重进行预测测试\n  ├── validation.py: 利用训练好的权重验证/测试数据的COCO指标，并生成record_mAP.txt文件\n  └── pascal_voc_classes.json: pascal_voc标签文件\n```\n\n## 预训练权重下载地址（下载后放入backbone文件夹中）：\n* MobileNetV2 weights(下载后重命名为`mobilenet_v2.pth`，然后放到`bakcbone`文件夹下): https://download.pytorch.org/models/mobilenet_v2-b0353104.pth\n* Resnet50 weights(下载后重命名为`resnet50.pth`，然后放到`bakcbone`文件夹下): https://download.pytorch.org/models/resnet50-0676ba61.pth\n* ResNet50+FPN weights: https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth\n* 注意，下载的预训练权重记得要重命名，比如在train_resnet50_fpn.py中读取的是`fasterrcnn_resnet50_fpn_coco.pth`文件，\n  不是`fasterrcnn_resnet50_fpn_coco-258fb6c6.pth`，然后放到当前项目根目录下即可。\n \n \n## 数据集，本例程使用的是PASCAL VOC2012数据集\n* Pascal VOC2012 train/val数据集下载地址：http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar\n* 如果不了解数据集或者想使用自己的数据集进行训练，请参考我的bilibili：https://b23.tv/F1kSCK\n* 使用ResNet50+FPN以及迁移学习在VOC2012数据集上得到的权重: 链接:https://pan.baidu.com/s/1ifilndFRtAV5RDZINSHj5w 提取码:dsz8\n\n## 训练方法\n* 确保提前准备好数据集\n* 确保提前下载好对应预训练模型权重\n* 若要训练mobilenetv2+fasterrcnn，直接使用train_mobilenet.py训练脚本\n* 若要训练resnet50+fpn+fasterrcnn，直接使用train_resnet50_fpn.py训练脚本\n* 若要使用多GPU训练，使用`python -m torch.distributed.launch --nproc_per_node=8 --use_env train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量\n* 如果想指定使用哪些GPU设备可在指令前加上`CUDA_VISIBLE_DEVICES=0,3`(例如我只要使用设备中的第1块和第4块GPU设备)\n* `CUDA_VISIBLE_DEVICES=0,3 python -m torch.distributed.launch --nproc_per_node=2 --use_env train_multi_GPU.py`\n\n## 注意事项\n* 在使用训练脚本时，注意要将`--data-path`(VOC_root)设置为自己存放`VOCdevkit`文件夹所在的**根目录**\n* 由于带有FPN结构的Faster RCNN很吃显存，如果GPU的显存不够(如果batch_size小于8的话)建议在create_model函数中使用默认的norm_layer，\n  即不传递norm_layer变量，默认去使用FrozenBatchNorm2d(即不会去更新参数的bn层),使用中发现效果也很好。\n* 训练过程中保存的`results.txt`是每个epoch在验证集上的COCO指标，前12个值是COCO指标，后面两个值是训练平均损失以及学习率\n* 在使用预测脚本时，要将`train_weights`设置为你自己生成的权重路径。\n* 使用validation文件时，注意确保你的验证集或者测试集中必须包含每个类别的目标，并且使用时只需要修改`--num-classes`、`--data-path`和`--weights-path`即可，其他代码尽量不要改动\n\n## 如果对Faster RCNN原理不是很理解可参考我的bilibili\n* https://b23.tv/sXcBSP\n\n## 进一步了解该项目，以及对Faster RCNN代码的分析可参考我的bilibili\n* https://b23.tv/HvMiDy\n\n## Faster RCNN框架图\n![Faster R-CNN](fasterRCNN.png) \n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/backbone/__init__.py",
    "content": "from .resnet50_fpn_model import resnet50_fpn_backbone\nfrom .mobilenetv2_model import MobileNetV2\nfrom .vgg_model import vgg\nfrom .feature_pyramid_network import LastLevelMaxPool, BackboneWithFPN\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/backbone/feature_pyramid_network.py",
    "content": "from collections import OrderedDict\n\nimport torch.nn as nn\nimport torch\nfrom torch import Tensor\nimport torch.nn.functional as F\n\nfrom torch.jit.annotations import Tuple, List, Dict\n\n\nclass IntermediateLayerGetter(nn.ModuleDict):\n    \"\"\"\n    Module wrapper that returns intermediate layers from a model\n    It has a strong assumption that the modules have been registered\n    into the model in the same order as they are used.\n    This means that one should **not** reuse the same nn.Module\n    twice in the forward if you want this to work.\n    Additionally, it is only able to query submodules that are directly\n    assigned to the model. So if `model` is passed, `model.feature1` can\n    be returned, but not `model.feature1.layer2`.\n    Arguments:\n        model (nn.Module): model on which we will extract the features\n        return_layers (Dict[name, new_name]): a dict containing the names\n            of the modules for which the activations will be returned as\n            the key of the dict, and the value of the dict is the name\n            of the returned activation (which the user can specify).\n    \"\"\"\n    __annotations__ = {\n        \"return_layers\": Dict[str, str],\n    }\n\n    def __init__(self, model, return_layers):\n        if not set(return_layers).issubset([name for name, _ in model.named_children()]):\n            raise ValueError(\"return_layers are not present in model\")\n\n        orig_return_layers = return_layers\n        return_layers = {str(k): str(v) for k, v in return_layers.items()}\n        layers = OrderedDict()\n\n        # 遍历模型子模块按顺序存入有序字典\n        # 只保存layer4及其之前的结构，舍去之后不用的结构\n        for name, module in model.named_children():\n            layers[name] = module\n            if name in return_layers:\n                del return_layers[name]\n            if not return_layers:\n                break\n\n        super().__init__(layers)\n        self.return_layers = orig_return_layers\n\n    def forward(self, x):\n        out = OrderedDict()\n        # 依次遍历模型的所有子模块，并进行正向传播，\n        # 收集layer1, layer2, layer3, layer4的输出\n        for name, module in self.items():\n            x = module(x)\n            if name in self.return_layers:\n                out_name = self.return_layers[name]\n                out[out_name] = x\n        return out\n\n\nclass FeaturePyramidNetwork(nn.Module):\n    \"\"\"\n    Module that adds a FPN from on top of a set of feature maps. This is based on\n    `\"Feature Pyramid Network for Object Detection\" <https://arxiv.org/abs/1612.03144>`_.\n    The feature maps are currently supposed to be in increasing depth\n    order.\n    The input to the model is expected to be an OrderedDict[Tensor], containing\n    the feature maps on top of which the FPN will be added.\n    Arguments:\n        in_channels_list (list[int]): number of channels for each feature map that\n            is passed to the module\n        out_channels (int): number of channels of the FPN representation\n        extra_blocks (ExtraFPNBlock or None): if provided, extra operations will\n            be performed. It is expected to take the fpn features, the original\n            features and the names of the original features as input, and returns\n            a new list of feature maps and their corresponding names\n    \"\"\"\n\n    def __init__(self, in_channels_list, out_channels, extra_blocks=None):\n        super().__init__()\n        # 用来调整resnet特征矩阵(layer1,2,3,4)的channel（kernel_size=1）\n        self.inner_blocks = nn.ModuleList()\n        # 对调整后的特征矩阵使用3x3的卷积核来得到对应的预测特征矩阵\n        self.layer_blocks = nn.ModuleList()\n        for in_channels in in_channels_list:\n            if in_channels == 0:\n                continue\n            inner_block_module = nn.Conv2d(in_channels, out_channels, 1)\n            layer_block_module = nn.Conv2d(out_channels, out_channels, 3, padding=1)\n            self.inner_blocks.append(inner_block_module)\n            self.layer_blocks.append(layer_block_module)\n\n        # initialize parameters now to avoid modifying the initialization of top_blocks\n        for m in self.children():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_uniform_(m.weight, a=1)\n                nn.init.constant_(m.bias, 0)\n\n        self.extra_blocks = extra_blocks\n\n    def get_result_from_inner_blocks(self, x: Tensor, idx: int) -> Tensor:\n        \"\"\"\n        This is equivalent to self.inner_blocks[idx](x),\n        but torchscript doesn't support this yet\n        \"\"\"\n        num_blocks = len(self.inner_blocks)\n        if idx < 0:\n            idx += num_blocks\n        i = 0\n        out = x\n        for module in self.inner_blocks:\n            if i == idx:\n                out = module(x)\n            i += 1\n        return out\n\n    def get_result_from_layer_blocks(self, x: Tensor, idx: int) -> Tensor:\n        \"\"\"\n        This is equivalent to self.layer_blocks[idx](x),\n        but torchscript doesn't support this yet\n        \"\"\"\n        num_blocks = len(self.layer_blocks)\n        if idx < 0:\n            idx += num_blocks\n        i = 0\n        out = x\n        for module in self.layer_blocks:\n            if i == idx:\n                out = module(x)\n            i += 1\n        return out\n\n    def forward(self, x: Dict[str, Tensor]) -> Dict[str, Tensor]:\n        \"\"\"\n        Computes the FPN for a set of feature maps.\n        Arguments:\n            x (OrderedDict[Tensor]): feature maps for each feature level.\n        Returns:\n            results (OrderedDict[Tensor]): feature maps after FPN layers.\n                They are ordered from highest resolution first.\n        \"\"\"\n        # unpack OrderedDict into two lists for easier handling\n        names = list(x.keys())\n        x = list(x.values())\n\n        # 将resnet layer4的channel调整到指定的out_channels\n        # last_inner = self.inner_blocks[-1](x[-1])\n        last_inner = self.get_result_from_inner_blocks(x[-1], -1)\n        # result中保存着每个预测特征层\n        results = []\n        # 将layer4调整channel后的特征矩阵，通过3x3卷积后得到对应的预测特征矩阵\n        # results.append(self.layer_blocks[-1](last_inner))\n        results.append(self.get_result_from_layer_blocks(last_inner, -1))\n\n        for idx in range(len(x) - 2, -1, -1):\n            inner_lateral = self.get_result_from_inner_blocks(x[idx], idx)\n            feat_shape = inner_lateral.shape[-2:]\n            inner_top_down = F.interpolate(last_inner, size=feat_shape, mode=\"nearest\")\n            last_inner = inner_lateral + inner_top_down\n            results.insert(0, self.get_result_from_layer_blocks(last_inner, idx))\n\n        # 在layer4对应的预测特征层基础上生成预测特征矩阵5\n        if self.extra_blocks is not None:\n            results, names = self.extra_blocks(results, x, names)\n\n        # make it back an OrderedDict\n        out = OrderedDict([(k, v) for k, v in zip(names, results)])\n\n        return out\n\n\nclass LastLevelMaxPool(torch.nn.Module):\n    \"\"\"\n    Applies a max_pool2d on top of the last feature map\n    \"\"\"\n\n    def forward(self, x: List[Tensor], y: List[Tensor], names: List[str]) -> Tuple[List[Tensor], List[str]]:\n        names.append(\"pool\")\n        x.append(F.max_pool2d(x[-1], 1, 2, 0))  # input, kernel_size, stride, padding\n        return x, names\n\n\nclass BackboneWithFPN(nn.Module):\n    \"\"\"\n    Adds a FPN on top of a model.\n    Internally, it uses torchvision.models._utils.IntermediateLayerGetter to\n    extract a submodel that returns the feature maps specified in return_layers.\n    The same limitations of IntermediatLayerGetter apply here.\n    Arguments:\n        backbone (nn.Module)\n        return_layers (Dict[name, new_name]): a dict containing the names\n            of the modules for which the activations will be returned as\n            the key of the dict, and the value of the dict is the name\n            of the returned activation (which the user can specify).\n        in_channels_list (List[int]): number of channels for each feature map\n            that is returned, in the order they are present in the OrderedDict\n        out_channels (int): number of channels in the FPN.\n        extra_blocks: ExtraFPNBlock\n    Attributes:\n        out_channels (int): the number of channels in the FPN\n    \"\"\"\n\n    def __init__(self,\n                 backbone: nn.Module,\n                 return_layers=None,\n                 in_channels_list=None,\n                 out_channels=256,\n                 extra_blocks=None,\n                 re_getter=True):\n        super().__init__()\n\n        if extra_blocks is None:\n            extra_blocks = LastLevelMaxPool()\n\n        if re_getter is True:\n            assert return_layers is not None\n            self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)\n        else:\n            self.body = backbone\n\n        self.fpn = FeaturePyramidNetwork(\n            in_channels_list=in_channels_list,\n            out_channels=out_channels,\n            extra_blocks=extra_blocks,\n        )\n\n        self.out_channels = out_channels\n\n    def forward(self, x):\n        x = self.body(x)\n        x = self.fpn(x)\n        return x\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/backbone/mobilenetv2_model.py",
    "content": "from torch import nn\nimport torch\nfrom torchvision.ops import misc\n\n\ndef _make_divisible(ch, divisor=8, min_ch=None):\n    \"\"\"\n    This function is taken from the original tf repo.\n    It ensures that all layers have a channel number that is divisible by 8\n    It can be seen here:\n    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py\n    \"\"\"\n    if min_ch is None:\n        min_ch = divisor\n    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)\n    # Make sure that round down does not go down by more than 10%.\n    if new_ch < 0.9 * ch:\n        new_ch += divisor\n    return new_ch\n\n\nclass ConvBNReLU(nn.Sequential):\n    def __init__(self, in_channel, out_channel, kernel_size=3, stride=1, groups=1, norm_layer=None):\n        padding = (kernel_size - 1) // 2\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n        super(ConvBNReLU, self).__init__(\n            nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, groups=groups, bias=False),\n            norm_layer(out_channel),\n            nn.ReLU6(inplace=True)\n        )\n\n\nclass InvertedResidual(nn.Module):\n    def __init__(self, in_channel, out_channel, stride, expand_ratio, norm_layer=None):\n        super(InvertedResidual, self).__init__()\n        hidden_channel = in_channel * expand_ratio\n        self.use_shortcut = stride == 1 and in_channel == out_channel\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n\n        layers = []\n        if expand_ratio != 1:\n            # 1x1 pointwise conv\n            layers.append(ConvBNReLU(in_channel, hidden_channel, kernel_size=1, norm_layer=norm_layer))\n        layers.extend([\n            # 3x3 depthwise conv\n            ConvBNReLU(hidden_channel, hidden_channel, stride=stride, groups=hidden_channel, norm_layer=norm_layer),\n            # 1x1 pointwise conv(linear)\n            nn.Conv2d(hidden_channel, out_channel, kernel_size=1, bias=False),\n            norm_layer(out_channel),\n        ])\n\n        self.conv = nn.Sequential(*layers)\n\n    def forward(self, x):\n        if self.use_shortcut:\n            return x + self.conv(x)\n        else:\n            return self.conv(x)\n\n\nclass MobileNetV2(nn.Module):\n    def __init__(self, num_classes=1000, alpha=1.0, round_nearest=8, weights_path=None, norm_layer=None):\n        super(MobileNetV2, self).__init__()\n        block = InvertedResidual\n        input_channel = _make_divisible(32 * alpha, round_nearest)\n        last_channel = _make_divisible(1280 * alpha, round_nearest)\n\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n\n        inverted_residual_setting = [\n            # t, c, n, s\n            [1, 16, 1, 1],\n            [6, 24, 2, 2],\n            [6, 32, 3, 2],\n            [6, 64, 4, 2],\n            [6, 96, 3, 1],\n            [6, 160, 3, 2],\n            [6, 320, 1, 1],\n        ]\n\n        features = []\n        # conv1 layer\n        features.append(ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer))\n        # building inverted residual residual blockes\n        for t, c, n, s in inverted_residual_setting:\n            output_channel = _make_divisible(c * alpha, round_nearest)\n            for i in range(n):\n                stride = s if i == 0 else 1\n                features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer))\n                input_channel = output_channel\n        # building last several layers\n        features.append(ConvBNReLU(input_channel, last_channel, 1, norm_layer=norm_layer))\n        # combine feature layers\n        self.features = nn.Sequential(*features)\n\n        # building classifier\n        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))\n        self.classifier = nn.Sequential(\n            nn.Dropout(0.2),\n            nn.Linear(last_channel, num_classes)\n        )\n\n        if weights_path is None:\n            # weight initialization\n            for m in self.modules():\n                if isinstance(m, nn.Conv2d):\n                    nn.init.kaiming_normal_(m.weight, mode='fan_out')\n                    if m.bias is not None:\n                        nn.init.zeros_(m.bias)\n                elif isinstance(m, nn.BatchNorm2d):\n                    nn.init.ones_(m.weight)\n                    nn.init.zeros_(m.bias)\n                elif isinstance(m, nn.Linear):\n                    nn.init.normal_(m.weight, 0, 0.01)\n                    nn.init.zeros_(m.bias)\n        else:\n            self.load_state_dict(torch.load(weights_path))\n\n    def forward(self, x):\n        x = self.features(x)\n        x = self.avgpool(x)\n        x = torch.flatten(x, 1)\n        x = self.classifier(x)\n        return x\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/backbone/resnet50_fpn_model.py",
    "content": "import os\n\nimport torch\nimport torch.nn as nn\nfrom torchvision.ops.misc import FrozenBatchNorm2d\n\nfrom .feature_pyramid_network import BackboneWithFPN, LastLevelMaxPool\n\n\nclass Bottleneck(nn.Module):\n    expansion = 4\n\n    def __init__(self, in_channel, out_channel, stride=1, downsample=None, norm_layer=None):\n        super().__init__()\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n\n        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,\n                               kernel_size=1, stride=1, bias=False)  # squeeze channels\n        self.bn1 = norm_layer(out_channel)\n        # -----------------------------------------\n        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,\n                               kernel_size=3, stride=stride, bias=False, padding=1)\n        self.bn2 = norm_layer(out_channel)\n        # -----------------------------------------\n        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel * self.expansion,\n                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels\n        self.bn3 = norm_layer(out_channel * self.expansion)\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n\n    def forward(self, x):\n        identity = x\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass ResNet(nn.Module):\n\n    def __init__(self, block, blocks_num, num_classes=1000, include_top=True, norm_layer=None):\n        super().__init__()\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n        self._norm_layer = norm_layer\n\n        self.include_top = include_top\n        self.in_channel = 64\n\n        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,\n                               padding=3, bias=False)\n        self.bn1 = norm_layer(self.in_channel)\n        self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.layer1 = self._make_layer(block, 64, blocks_num[0])\n        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)\n        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)\n        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)\n        if self.include_top:\n            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)\n            self.fc = nn.Linear(512 * block.expansion, num_classes)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')\n\n    def _make_layer(self, block, channel, block_num, stride=1):\n        norm_layer = self._norm_layer\n        downsample = None\n        if stride != 1 or self.in_channel != channel * block.expansion:\n            downsample = nn.Sequential(\n                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),\n                norm_layer(channel * block.expansion))\n\n        layers = []\n        layers.append(block(self.in_channel, channel, downsample=downsample,\n                            stride=stride, norm_layer=norm_layer))\n        self.in_channel = channel * block.expansion\n\n        for _ in range(1, block_num):\n            layers.append(block(self.in_channel, channel, norm_layer=norm_layer))\n\n        return nn.Sequential(*layers)\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n\n        if self.include_top:\n            x = self.avgpool(x)\n            x = torch.flatten(x, 1)\n            x = self.fc(x)\n\n        return x\n\n\ndef overwrite_eps(model, eps):\n    \"\"\"\n    This method overwrites the default eps values of all the\n    FrozenBatchNorm2d layers of the model with the provided value.\n    This is necessary to address the BC-breaking change introduced\n    by the bug-fix at pytorch/vision#2933. The overwrite is applied\n    only when the pretrained weights are loaded to maintain compatibility\n    with previous versions.\n\n    Args:\n        model (nn.Module): The model on which we perform the overwrite.\n        eps (float): The new value of eps.\n    \"\"\"\n    for module in model.modules():\n        if isinstance(module, FrozenBatchNorm2d):\n            module.eps = eps\n\n\ndef resnet50_fpn_backbone(pretrain_path=\"\",\n                          norm_layer=FrozenBatchNorm2d,  # FrozenBatchNorm2d的功能与BatchNorm2d类似，但参数无法更新\n                          trainable_layers=3,\n                          returned_layers=None,\n                          extra_blocks=None):\n    \"\"\"\n    搭建resnet50_fpn——backbone\n    Args:\n        pretrain_path: resnet50的预训练权重，如果不使用就默认为空\n        norm_layer: 官方默认的是FrozenBatchNorm2d，即不会更新参数的bn层(因为如果batch_size设置的很小会导致效果更差，还不如不用bn层)\n                    如果自己的GPU显存很大可以设置很大的batch_size，那么自己可以传入正常的BatchNorm2d层\n                    (https://github.com/facebookresearch/maskrcnn-benchmark/issues/267)\n        trainable_layers: 指定训练哪些层结构\n        returned_layers: 指定哪些层的输出需要返回\n        extra_blocks: 在输出的特征层基础上额外添加的层结构\n\n    Returns:\n\n    \"\"\"\n    resnet_backbone = ResNet(Bottleneck, [3, 4, 6, 3],\n                             include_top=False,\n                             norm_layer=norm_layer)\n\n    if isinstance(norm_layer, FrozenBatchNorm2d):\n        overwrite_eps(resnet_backbone, 0.0)\n\n    if pretrain_path != \"\":\n        assert os.path.exists(pretrain_path), \"{} is not exist.\".format(pretrain_path)\n        # 载入预训练权重\n        print(resnet_backbone.load_state_dict(torch.load(pretrain_path), strict=False))\n\n    # select layers that wont be frozen\n    assert 0 <= trainable_layers <= 5\n    layers_to_train = ['layer4', 'layer3', 'layer2', 'layer1', 'conv1'][:trainable_layers]\n\n    # 如果要训练所有层结构的话，不要忘了conv1后还有一个bn1\n    if trainable_layers == 5:\n        layers_to_train.append(\"bn1\")\n\n    # freeze layers\n    for name, parameter in resnet_backbone.named_parameters():\n        # 只训练不在layers_to_train列表中的层结构\n        if all([not name.startswith(layer) for layer in layers_to_train]):\n            parameter.requires_grad_(False)\n\n    if extra_blocks is None:\n        extra_blocks = LastLevelMaxPool()\n\n    if returned_layers is None:\n        returned_layers = [1, 2, 3, 4]\n    # 返回的特征层个数肯定大于0小于5\n    assert min(returned_layers) > 0 and max(returned_layers) < 5\n\n    # return_layers = {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'}\n    return_layers = {f'layer{k}': str(v) for v, k in enumerate(returned_layers)}\n\n    # in_channel 为layer4的输出特征矩阵channel = 2048\n    in_channels_stage2 = resnet_backbone.in_channel // 8  # 256\n    # 记录resnet50提供给fpn的每个特征层channel\n    in_channels_list = [in_channels_stage2 * 2 ** (i - 1) for i in returned_layers]\n    # 通过fpn后得到的每个特征层的channel\n    out_channels = 256\n    return BackboneWithFPN(resnet_backbone, return_layers, in_channels_list, out_channels, extra_blocks=extra_blocks)\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/backbone/vgg_model.py",
    "content": "import torch.nn as nn\nimport torch\n\n\nclass VGG(nn.Module):\n    def __init__(self, features, class_num=1000, init_weights=False, weights_path=None):\n        super(VGG, self).__init__()\n        self.features = features\n        self.classifier = nn.Sequential(\n            nn.Linear(512*7*7, 4096),\n            nn.ReLU(True),\n            nn.Dropout(p=0.5),\n            nn.Linear(4096, 4096),\n            nn.ReLU(True),\n            nn.Dropout(p=0.5),\n            nn.Linear(4096, class_num)\n        )\n        if init_weights and weights_path is None:\n            self._initialize_weights()\n\n        if weights_path is not None:\n            self.load_state_dict(torch.load(weights_path))\n\n    def forward(self, x):\n        # N x 3 x 224 x 224\n        x = self.features(x)\n        # N x 512 x 7 x 7\n        x = torch.flatten(x, start_dim=1)\n        # N x 512*7*7\n        x = self.classifier(x)\n        return x\n\n    def _initialize_weights(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')\n                nn.init.xavier_uniform_(m.weight)\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.Linear):\n                nn.init.xavier_uniform_(m.weight)\n                # nn.init.normal_(m.weight, 0, 0.01)\n                nn.init.constant_(m.bias, 0)\n\n\ndef make_features(cfg: list):\n    layers = []\n    in_channels = 3\n    for v in cfg:\n        if v == \"M\":\n            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]\n        else:\n            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)\n            layers += [conv2d, nn.ReLU(True)]\n            in_channels = v\n    return nn.Sequential(*layers)\n\n\ncfgs = {\n    'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],\n    'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],\n    'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],\n    'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],\n}\n\n\ndef vgg(model_name=\"vgg16\", weights_path=None):\n    assert model_name in cfgs, \"Warning: model number {} not in cfgs dict!\".format(model_name)\n    cfg = cfgs[model_name]\n\n    model = VGG(make_features(cfg), weights_path=weights_path)\n    return model\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/change_backbone_with_fpn.py",
    "content": "import os\nimport datetime\n\nimport torch\n\nimport transforms\nfrom network_files import FasterRCNN, AnchorsGenerator\nfrom my_dataset import VOCDataSet\nfrom train_utils import GroupedBatchSampler, create_aspect_ratio_groups\nfrom train_utils import train_eval_utils as utils\nfrom backbone import BackboneWithFPN, LastLevelMaxPool\n\n\ndef create_model(num_classes):\n    import torchvision\n    from torchvision.models.feature_extraction import create_feature_extractor\n\n    # --- mobilenet_v3_large fpn backbone --- #\n    backbone = torchvision.models.mobilenet_v3_large(pretrained=True)\n    # print(backbone)\n    return_layers = {\"features.6\": \"0\",   # stride 8\n                     \"features.12\": \"1\",  # stride 16\n                     \"features.16\": \"2\"}  # stride 32\n    # 提供给fpn的每个特征层channel\n    in_channels_list = [40, 112, 960]\n    new_backbone = create_feature_extractor(backbone, return_layers)\n    # img = torch.randn(1, 3, 224, 224)\n    # outputs = new_backbone(img)\n    # [print(f\"{k} shape: {v.shape}\") for k, v in outputs.items()]\n\n    # --- efficientnet_b0 fpn backbone --- #\n    # backbone = torchvision.models.efficientnet_b0(pretrained=True)\n    # # print(backbone)\n    # return_layers = {\"features.3\": \"0\",  # stride 8\n    #                  \"features.4\": \"1\",  # stride 16\n    #                  \"features.8\": \"2\"}  # stride 32\n    # # 提供给fpn的每个特征层channel\n    # in_channels_list = [40, 80, 1280]\n    # new_backbone = create_feature_extractor(backbone, return_layers)\n    # # img = torch.randn(1, 3, 224, 224)\n    # # outputs = new_backbone(img)\n    # # [print(f\"{k} shape: {v.shape}\") for k, v in outputs.items()]\n\n    backbone_with_fpn = BackboneWithFPN(new_backbone,\n                                        return_layers=return_layers,\n                                        in_channels_list=in_channels_list,\n                                        out_channels=256,\n                                        extra_blocks=LastLevelMaxPool(),\n                                        re_getter=False)\n\n    anchor_sizes = ((64,), (128,), (256,), (512,))\n    aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)\n    anchor_generator = AnchorsGenerator(sizes=anchor_sizes,\n                                        aspect_ratios=aspect_ratios)\n\n    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0', '1', '2'],  # 在哪些特征层上进行RoIAlign pooling\n                                                    output_size=[7, 7],  # RoIAlign pooling输出特征矩阵尺寸\n                                                    sampling_ratio=2)  # 采样率\n\n    model = FasterRCNN(backbone=backbone_with_fpn,\n                       num_classes=num_classes,\n                       rpn_anchor_generator=anchor_generator,\n                       box_roi_pool=roi_pooler)\n\n    return model\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n    print(\"Using {} device training.\".format(device.type))\n\n    # 用来保存coco_info的文件\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.ToTensor(),\n                                     transforms.RandomHorizontalFlip(0.5)]),\n        \"val\": transforms.Compose([transforms.ToTensor()])\n    }\n\n    VOC_root = args.data_path\n    # check voc root\n    if os.path.exists(os.path.join(VOC_root, \"VOCdevkit\")) is False:\n        raise FileNotFoundError(\"VOCdevkit dose not in path:'{}'.\".format(VOC_root))\n\n    # load train data set\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt\n    train_dataset = VOCDataSet(VOC_root, \"2012\", data_transform[\"train\"], \"train.txt\")\n    train_sampler = None\n\n    # 是否按图片相似高宽比采样图片组成batch\n    # 使用的话能够减小训练时所需GPU显存，默认使用\n    if args.aspect_ratio_group_factor >= 0:\n        train_sampler = torch.utils.data.RandomSampler(train_dataset)\n        # 统计所有图像高宽比例在bins区间中的位置索引\n        group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor)\n        # 每个batch图片从同一高宽比例区间中取\n        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)\n\n    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch\n    batch_size = args.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using %g dataloader workers' % nw)\n    if train_sampler:\n        # 如果按照图片高宽比采样图片，dataloader中需要使用batch_sampler\n        train_data_loader = torch.utils.data.DataLoader(train_dataset,\n                                                        batch_sampler=train_batch_sampler,\n                                                        pin_memory=True,\n                                                        num_workers=nw,\n                                                        collate_fn=train_dataset.collate_fn)\n    else:\n        train_data_loader = torch.utils.data.DataLoader(train_dataset,\n                                                        batch_size=batch_size,\n                                                        shuffle=True,\n                                                        pin_memory=True,\n                                                        num_workers=nw,\n                                                        collate_fn=train_dataset.collate_fn)\n\n    # load validation data set\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt\n    val_dataset = VOCDataSet(VOC_root, \"2012\", data_transform[\"val\"], \"val.txt\")\n    val_data_set_loader = torch.utils.data.DataLoader(val_dataset,\n                                                      batch_size=1,\n                                                      shuffle=False,\n                                                      pin_memory=True,\n                                                      num_workers=nw,\n                                                      collate_fn=val_dataset.collate_fn)\n\n    # create model num_classes equal background + 20 classes\n    model = create_model(num_classes=args.num_classes + 1)\n    # print(model)\n\n    model.to(device)\n\n    # define optimizer\n    params = [p for p in model.parameters() if p.requires_grad]\n    optimizer = torch.optim.SGD(params,\n                                lr=args.lr,\n                                momentum=args.momentum,\n                                weight_decay=args.weight_decay)\n\n    scaler = torch.cuda.amp.GradScaler() if args.amp else None\n\n    # learning rate scheduler\n    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,\n                                                   step_size=3,\n                                                   gamma=0.33)\n\n    # 如果指定了上次训练保存的权重文件地址，则接着上次结果接着训练\n    if args.resume != \"\":\n        checkpoint = torch.load(args.resume, map_location='cpu')\n        model.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n        if args.amp and \"scaler\" in checkpoint:\n            scaler.load_state_dict(checkpoint[\"scaler\"])\n        print(\"the training process from epoch{}...\".format(args.start_epoch))\n\n    train_loss = []\n    learning_rate = []\n    val_map = []\n\n    for epoch in range(args.start_epoch, args.epochs):\n        # train for one epoch, printing every 10 iterations\n        mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader,\n                                              device=device, epoch=epoch,\n                                              print_freq=50, warmup=True,\n                                              scaler=scaler)\n        train_loss.append(mean_loss.item())\n        learning_rate.append(lr)\n\n        # update the learning rate\n        lr_scheduler.step()\n\n        # evaluate on the test dataset\n        coco_info = utils.evaluate(model, val_data_set_loader, device=device)\n\n        # write into txt\n        with open(results_file, \"a\") as f:\n            # 写入的数据包括coco指标还有loss和learning rate\n            result_info = [f\"{i:.4f}\" for i in coco_info + [mean_loss.item()]] + [f\"{lr:.6f}\"]\n            txt = \"epoch:{} {}\".format(epoch, '  '.join(result_info))\n            f.write(txt + \"\\n\")\n\n        val_map.append(coco_info[1])  # pascal mAP\n\n        # save weights\n        save_files = {\n            'model': model.state_dict(),\n            'optimizer': optimizer.state_dict(),\n            'lr_scheduler': lr_scheduler.state_dict(),\n            'epoch': epoch}\n        if args.amp:\n            save_files[\"scaler\"] = scaler.state_dict()\n        torch.save(save_files, \"./save_weights/resNetFpn-model-{}.pth\".format(epoch))\n\n    # plot loss and lr curve\n    if len(train_loss) != 0 and len(learning_rate) != 0:\n        from plot_curve import plot_loss_and_lr\n        plot_loss_and_lr(train_loss, learning_rate)\n\n    # plot mAP curve\n    if len(val_map) != 0:\n        from plot_curve import plot_map\n        plot_map(val_map)\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 训练设备类型\n    parser.add_argument('--device', default='cuda:0', help='device')\n    # 训练数据集的根目录(VOCdevkit)\n    parser.add_argument('--data-path', default='./', help='dataset')\n    # 检测目标类别数(不包含背景)\n    parser.add_argument('--num-classes', default=20, type=int, help='num_classes')\n    # 文件保存地址\n    parser.add_argument('--output-dir', default='./save_weights', help='path where to save')\n    # 若需要接着上次训练，则指定上次训练保存权重文件地址\n    parser.add_argument('--resume', default='', type=str, help='resume from checkpoint')\n    # 指定接着从哪个epoch数开始训练\n    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')\n    # 训练的总epoch数\n    parser.add_argument('--epochs', default=15, type=int, metavar='N',\n                        help='number of total epochs to run')\n    # 学习率\n    parser.add_argument('--lr', default=0.005, type=float,\n                        help='initial learning rate, 0.02 is the default value for training '\n                             'on 8 gpus and 2 images_per_gpu')\n    # SGD的momentum参数\n    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',\n                        help='momentum')\n    # SGD的weight_decay参数\n    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,\n                        metavar='W', help='weight decay (default: 1e-4)',\n                        dest='weight_decay')\n    # 训练的batch size\n    parser.add_argument('--batch_size', default=4, type=int, metavar='N',\n                        help='batch size when training.')\n    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)\n    # 是否使用混合精度训练(需要GPU支持混合精度)\n    parser.add_argument(\"--amp\", default=False, help=\"Use torch.cuda.amp for mixed precision training\")\n\n    args = parser.parse_args()\n    print(args)\n\n    # 检查保存权重文件夹是否存在，不存在则创建\n    if not os.path.exists(args.output_dir):\n        os.makedirs(args.output_dir)\n\n    main(args)\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/change_backbone_without_fpn.py",
    "content": "import os\nimport datetime\n\nimport torch\n\nimport transforms\nfrom network_files import FasterRCNN, AnchorsGenerator\nfrom my_dataset import VOCDataSet\nfrom train_utils import GroupedBatchSampler, create_aspect_ratio_groups\nfrom train_utils import train_eval_utils as utils\n\n\ndef create_model(num_classes):\n    import torchvision\n    from torchvision.models.feature_extraction import create_feature_extractor\n\n    # vgg16\n    backbone = torchvision.models.vgg16_bn(pretrained=True)\n    # print(backbone)\n    backbone = create_feature_extractor(backbone, return_nodes={\"features.42\": \"0\"})\n    # out = backbone(torch.rand(1, 3, 224, 224))\n    # print(out[\"0\"].shape)\n    backbone.out_channels = 512\n\n    # resnet50 backbone\n    # backbone = torchvision.models.resnet50(pretrained=True)\n    # # print(backbone)\n    # backbone = create_feature_extractor(backbone, return_nodes={\"layer3\": \"0\"})\n    # # out = backbone(torch.rand(1, 3, 224, 224))\n    # # print(out[\"0\"].shape)\n    # backbone.out_channels = 1024\n\n    # EfficientNetB0\n    # backbone = torchvision.models.efficientnet_b0(pretrained=True)\n    # # print(backbone)\n    # backbone = create_feature_extractor(backbone, return_nodes={\"features.5\": \"0\"})\n    # # out = backbone(torch.rand(1, 3, 224, 224))\n    # # print(out[\"0\"].shape)\n    # backbone.out_channels = 112\n\n    anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),),\n                                        aspect_ratios=((0.5, 1.0, 2.0),))\n\n    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],  # 在哪些特征层上进行RoIAlign pooling\n                                                    output_size=[7, 7],  # RoIAlign pooling输出特征矩阵尺寸\n                                                    sampling_ratio=2)  # 采样率\n\n    model = FasterRCNN(backbone=backbone,\n                       num_classes=num_classes,\n                       rpn_anchor_generator=anchor_generator,\n                       box_roi_pool=roi_pooler)\n\n    return model\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n    print(\"Using {} device training.\".format(device.type))\n\n    # 用来保存coco_info的文件\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.ToTensor(),\n                                     transforms.RandomHorizontalFlip(0.5)]),\n        \"val\": transforms.Compose([transforms.ToTensor()])\n    }\n\n    VOC_root = args.data_path\n    # check voc root\n    if os.path.exists(os.path.join(VOC_root, \"VOCdevkit\")) is False:\n        raise FileNotFoundError(\"VOCdevkit dose not in path:'{}'.\".format(VOC_root))\n\n    # load train data set\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt\n    train_dataset = VOCDataSet(VOC_root, \"2012\", data_transform[\"train\"], \"train.txt\")\n    train_sampler = None\n\n    # 是否按图片相似高宽比采样图片组成batch\n    # 使用的话能够减小训练时所需GPU显存，默认使用\n    if args.aspect_ratio_group_factor >= 0:\n        train_sampler = torch.utils.data.RandomSampler(train_dataset)\n        # 统计所有图像高宽比例在bins区间中的位置索引\n        group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor)\n        # 每个batch图片从同一高宽比例区间中取\n        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)\n\n    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch\n    batch_size = args.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using %g dataloader workers' % nw)\n    if train_sampler:\n        # 如果按照图片高宽比采样图片，dataloader中需要使用batch_sampler\n        train_data_loader = torch.utils.data.DataLoader(train_dataset,\n                                                        batch_sampler=train_batch_sampler,\n                                                        pin_memory=True,\n                                                        num_workers=nw,\n                                                        collate_fn=train_dataset.collate_fn)\n    else:\n        train_data_loader = torch.utils.data.DataLoader(train_dataset,\n                                                        batch_size=batch_size,\n                                                        shuffle=True,\n                                                        pin_memory=True,\n                                                        num_workers=nw,\n                                                        collate_fn=train_dataset.collate_fn)\n\n    # load validation data set\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt\n    val_dataset = VOCDataSet(VOC_root, \"2012\", data_transform[\"val\"], \"val.txt\")\n    val_data_set_loader = torch.utils.data.DataLoader(val_dataset,\n                                                      batch_size=1,\n                                                      shuffle=False,\n                                                      pin_memory=True,\n                                                      num_workers=nw,\n                                                      collate_fn=val_dataset.collate_fn)\n\n    # create model num_classes equal background + 20 classes\n    model = create_model(num_classes=args.num_classes + 1)\n    # print(model)\n\n    model.to(device)\n\n    # define optimizer\n    params = [p for p in model.parameters() if p.requires_grad]\n    optimizer = torch.optim.SGD(params,\n                                lr=args.lr,\n                                momentum=args.momentum,\n                                weight_decay=args.weight_decay)\n\n    scaler = torch.cuda.amp.GradScaler() if args.amp else None\n\n    # learning rate scheduler\n    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,\n                                                   step_size=3,\n                                                   gamma=0.33)\n\n    # 如果指定了上次训练保存的权重文件地址，则接着上次结果接着训练\n    if args.resume != \"\":\n        checkpoint = torch.load(args.resume, map_location='cpu')\n        model.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n        if args.amp and \"scaler\" in checkpoint:\n            scaler.load_state_dict(checkpoint[\"scaler\"])\n        print(\"the training process from epoch{}...\".format(args.start_epoch))\n\n    train_loss = []\n    learning_rate = []\n    val_map = []\n\n    for epoch in range(args.start_epoch, args.epochs):\n        # train for one epoch, printing every 10 iterations\n        mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader,\n                                              device=device, epoch=epoch,\n                                              print_freq=50, warmup=True,\n                                              scaler=scaler)\n        train_loss.append(mean_loss.item())\n        learning_rate.append(lr)\n\n        # update the learning rate\n        lr_scheduler.step()\n\n        # evaluate on the test dataset\n        coco_info = utils.evaluate(model, val_data_set_loader, device=device)\n\n        # write into txt\n        with open(results_file, \"a\") as f:\n            # 写入的数据包括coco指标还有loss和learning rate\n            result_info = [f\"{i:.4f}\" for i in coco_info + [mean_loss.item()]] + [f\"{lr:.6f}\"]\n            txt = \"epoch:{} {}\".format(epoch, '  '.join(result_info))\n            f.write(txt + \"\\n\")\n\n        val_map.append(coco_info[1])  # pascal mAP\n\n        # save weights\n        save_files = {\n            'model': model.state_dict(),\n            'optimizer': optimizer.state_dict(),\n            'lr_scheduler': lr_scheduler.state_dict(),\n            'epoch': epoch}\n        if args.amp:\n            save_files[\"scaler\"] = scaler.state_dict()\n        torch.save(save_files, \"./save_weights/resNetFpn-model-{}.pth\".format(epoch))\n\n    # plot loss and lr curve\n    if len(train_loss) != 0 and len(learning_rate) != 0:\n        from plot_curve import plot_loss_and_lr\n        plot_loss_and_lr(train_loss, learning_rate)\n\n    # plot mAP curve\n    if len(val_map) != 0:\n        from plot_curve import plot_map\n        plot_map(val_map)\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 训练设备类型\n    parser.add_argument('--device', default='cuda:0', help='device')\n    # 训练数据集的根目录(VOCdevkit)\n    parser.add_argument('--data-path', default='./', help='dataset')\n    # 检测目标类别数(不包含背景)\n    parser.add_argument('--num-classes', default=20, type=int, help='num_classes')\n    # 文件保存地址\n    parser.add_argument('--output-dir', default='./save_weights', help='path where to save')\n    # 若需要接着上次训练，则指定上次训练保存权重文件地址\n    parser.add_argument('--resume', default='', type=str, help='resume from checkpoint')\n    # 指定接着从哪个epoch数开始训练\n    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')\n    # 训练的总epoch数\n    parser.add_argument('--epochs', default=15, type=int, metavar='N',\n                        help='number of total epochs to run')\n    # 学习率\n    parser.add_argument('--lr', default=0.005, type=float,\n                        help='initial learning rate, 0.02 is the default value for training '\n                             'on 8 gpus and 2 images_per_gpu')\n    # SGD的momentum参数\n    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',\n                        help='momentum')\n    # SGD的weight_decay参数\n    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,\n                        metavar='W', help='weight decay (default: 1e-4)',\n                        dest='weight_decay')\n    # 训练的batch size\n    parser.add_argument('--batch_size', default=4, type=int, metavar='N',\n                        help='batch size when training.')\n    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)\n    # 是否使用混合精度训练(需要GPU支持混合精度)\n    parser.add_argument(\"--amp\", default=False, help=\"Use torch.cuda.amp for mixed precision training\")\n\n    args = parser.parse_args()\n    print(args)\n\n    # 检查保存权重文件夹是否存在，不存在则创建\n    if not os.path.exists(args.output_dir):\n        os.makedirs(args.output_dir)\n\n    main(args)\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/draw_box_utils.py",
    "content": "from PIL.Image import Image, fromarray\nimport PIL.ImageDraw as ImageDraw\nimport PIL.ImageFont as ImageFont\nfrom PIL import ImageColor\nimport numpy as np\n\nSTANDARD_COLORS = [\n    'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',\n    'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',\n    'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',\n    'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',\n    'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',\n    'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',\n    'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',\n    'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',\n    'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',\n    'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',\n    'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',\n    'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',\n    'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',\n    'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',\n    'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',\n    'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',\n    'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',\n    'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',\n    'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',\n    'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',\n    'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',\n    'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',\n    'WhiteSmoke', 'Yellow', 'YellowGreen'\n]\n\n\ndef draw_text(draw,\n              box: list,\n              cls: int,\n              score: float,\n              category_index: dict,\n              color: str,\n              font: str = 'arial.ttf',\n              font_size: int = 24):\n    \"\"\"\n    将目标边界框和类别信息绘制到图片上\n    \"\"\"\n    try:\n        font = ImageFont.truetype(font, font_size)\n    except IOError:\n        font = ImageFont.load_default()\n\n    left, top, right, bottom = box\n    # If the total height of the display strings added to the top of the bounding\n    # box exceeds the top of the image, stack the strings below the bounding box\n    # instead of above.\n    display_str = f\"{category_index[str(cls)]}: {int(100 * score)}%\"\n    display_str_heights = [font.getsize(ds)[1] for ds in display_str]\n    # Each display_str has a top and bottom margin of 0.05x.\n    display_str_height = (1 + 2 * 0.05) * max(display_str_heights)\n\n    if top > display_str_height:\n        text_top = top - display_str_height\n        text_bottom = top\n    else:\n        text_top = bottom\n        text_bottom = bottom + display_str_height\n\n    for ds in display_str:\n        text_width, text_height = font.getsize(ds)\n        margin = np.ceil(0.05 * text_width)\n        draw.rectangle([(left, text_top),\n                        (left + text_width + 2 * margin, text_bottom)], fill=color)\n        draw.text((left + margin, text_top),\n                  ds,\n                  fill='black',\n                  font=font)\n        left += text_width\n\n\ndef draw_masks(image, masks, colors, thresh: float = 0.7, alpha: float = 0.5):\n    np_image = np.array(image)\n    masks = np.where(masks > thresh, True, False)\n\n    # colors = np.array(colors)\n    img_to_draw = np.copy(np_image)\n    # TODO: There might be a way to vectorize this\n    for mask, color in zip(masks, colors):\n        img_to_draw[mask] = color\n\n    out = np_image * (1 - alpha) + img_to_draw * alpha\n    return fromarray(out.astype(np.uint8))\n\n\ndef draw_objs(image: Image,\n              boxes: np.ndarray = None,\n              classes: np.ndarray = None,\n              scores: np.ndarray = None,\n              masks: np.ndarray = None,\n              category_index: dict = None,\n              box_thresh: float = 0.1,\n              mask_thresh: float = 0.5,\n              line_thickness: int = 8,\n              font: str = 'arial.ttf',\n              font_size: int = 24,\n              draw_boxes_on_image: bool = True,\n              draw_masks_on_image: bool = False):\n    \"\"\"\n    将目标边界框信息，类别信息，mask信息绘制在图片上\n    Args:\n        image: 需要绘制的图片\n        boxes: 目标边界框信息\n        classes: 目标类别信息\n        scores: 目标概率信息\n        masks: 目标mask信息\n        category_index: 类别与名称字典\n        box_thresh: 过滤的概率阈值\n        mask_thresh:\n        line_thickness: 边界框宽度\n        font: 字体类型\n        font_size: 字体大小\n        draw_boxes_on_image:\n        draw_masks_on_image:\n\n    Returns:\n\n    \"\"\"\n\n    # 过滤掉低概率的目标\n    idxs = np.greater(scores, box_thresh)\n    boxes = boxes[idxs]\n    classes = classes[idxs]\n    scores = scores[idxs]\n    if masks is not None:\n        masks = masks[idxs]\n    if len(boxes) == 0:\n        return image\n\n    colors = [ImageColor.getrgb(STANDARD_COLORS[cls % len(STANDARD_COLORS)]) for cls in classes]\n\n    if draw_boxes_on_image:\n        # Draw all boxes onto image.\n        draw = ImageDraw.Draw(image)\n        for box, cls, score, color in zip(boxes, classes, scores, colors):\n            left, top, right, bottom = box\n            # 绘制目标边界框\n            draw.line([(left, top), (left, bottom), (right, bottom),\n                       (right, top), (left, top)], width=line_thickness, fill=color)\n            # 绘制类别和概率信息\n            draw_text(draw, box.tolist(), int(cls), float(score), category_index, color, font, font_size)\n\n    if draw_masks_on_image and (masks is not None):\n        # Draw all mask onto image.\n        image = draw_masks(image, masks, colors, mask_thresh)\n\n    return image\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/my_dataset.py",
    "content": "import numpy as np\nfrom torch.utils.data import Dataset\nimport os\nimport torch\nimport json\nfrom PIL import Image\nfrom lxml import etree\n\n\nclass VOCDataSet(Dataset):\n    \"\"\"读取解析PASCAL VOC2007/2012数据集\"\"\"\n\n    def __init__(self, voc_root, year=\"2012\", transforms=None, txt_name: str = \"train.txt\"):\n        assert year in [\"2007\", \"2012\"], \"year must be in ['2007', '2012']\"\n        # 增加容错能力\n        if \"VOCdevkit\" in voc_root:\n            self.root = os.path.join(voc_root, f\"VOC{year}\")\n        else:\n            self.root = os.path.join(voc_root, \"VOCdevkit\", f\"VOC{year}\")\n        self.img_root = os.path.join(self.root, \"JPEGImages\")\n        self.annotations_root = os.path.join(self.root, \"Annotations\")\n\n        # read train.txt or val.txt file\n        txt_path = os.path.join(self.root, \"ImageSets\", \"Main\", txt_name)\n        assert os.path.exists(txt_path), \"not found {} file.\".format(txt_name)\n\n        with open(txt_path) as read:\n            xml_list = [os.path.join(self.annotations_root, line.strip() + \".xml\")\n                        for line in read.readlines() if len(line.strip()) > 0]\n\n        self.xml_list = []\n        # check file\n        for xml_path in xml_list:\n            if os.path.exists(xml_path) is False:\n                print(f\"Warning: not found '{xml_path}', skip this annotation file.\")\n                continue\n\n            # check for targets\n            with open(xml_path) as fid:\n                xml_str = fid.read()\n            xml = etree.fromstring(xml_str)\n            data = self.parse_xml_to_dict(xml)[\"annotation\"]\n            if \"object\" not in data:\n                print(f\"INFO: no objects in {xml_path}, skip this annotation file.\")\n                continue\n\n            self.xml_list.append(xml_path)\n\n        assert len(self.xml_list) > 0, \"in '{}' file does not find any information.\".format(txt_path)\n\n        # read class_indict\n        json_file = './pascal_voc_classes.json'\n        assert os.path.exists(json_file), \"{} file not exist.\".format(json_file)\n        with open(json_file, 'r') as f:\n            self.class_dict = json.load(f)\n\n        self.transforms = transforms\n\n    def __len__(self):\n        return len(self.xml_list)\n\n    def __getitem__(self, idx):\n        # read xml\n        xml_path = self.xml_list[idx]\n        with open(xml_path) as fid:\n            xml_str = fid.read()\n        xml = etree.fromstring(xml_str)\n        data = self.parse_xml_to_dict(xml)[\"annotation\"]\n        img_path = os.path.join(self.img_root, data[\"filename\"])\n        image = Image.open(img_path)\n        if image.format != \"JPEG\":\n            raise ValueError(\"Image '{}' format not JPEG\".format(img_path))\n\n        boxes = []\n        labels = []\n        iscrowd = []\n        assert \"object\" in data, \"{} lack of object information.\".format(xml_path)\n        for obj in data[\"object\"]:\n            xmin = float(obj[\"bndbox\"][\"xmin\"])\n            xmax = float(obj[\"bndbox\"][\"xmax\"])\n            ymin = float(obj[\"bndbox\"][\"ymin\"])\n            ymax = float(obj[\"bndbox\"][\"ymax\"])\n\n            # 进一步检查数据，有的标注信息中可能有w或h为0的情况，这样的数据会导致计算回归loss为nan\n            if xmax <= xmin or ymax <= ymin:\n                print(\"Warning: in '{}' xml, there are some bbox w/h <=0\".format(xml_path))\n                continue\n            \n            boxes.append([xmin, ymin, xmax, ymax])\n            labels.append(self.class_dict[obj[\"name\"]])\n            if \"difficult\" in obj:\n                iscrowd.append(int(obj[\"difficult\"]))\n            else:\n                iscrowd.append(0)\n\n        # convert everything into a torch.Tensor\n        boxes = torch.as_tensor(boxes, dtype=torch.float32)\n        labels = torch.as_tensor(labels, dtype=torch.int64)\n        iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)\n        image_id = torch.tensor([idx])\n        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])\n\n        target = {}\n        target[\"boxes\"] = boxes\n        target[\"labels\"] = labels\n        target[\"image_id\"] = image_id\n        target[\"area\"] = area\n        target[\"iscrowd\"] = iscrowd\n\n        if self.transforms is not None:\n            image, target = self.transforms(image, target)\n\n        return image, target\n\n    def get_height_and_width(self, idx):\n        # read xml\n        xml_path = self.xml_list[idx]\n        with open(xml_path) as fid:\n            xml_str = fid.read()\n        xml = etree.fromstring(xml_str)\n        data = self.parse_xml_to_dict(xml)[\"annotation\"]\n        data_height = int(data[\"size\"][\"height\"])\n        data_width = int(data[\"size\"][\"width\"])\n        return data_height, data_width\n\n    def parse_xml_to_dict(self, xml):\n        \"\"\"\n        将xml文件解析成字典形式，参考tensorflow的recursive_parse_xml_to_dict\n        Args:\n            xml: xml tree obtained by parsing XML file contents using lxml.etree\n\n        Returns:\n            Python dictionary holding XML contents.\n        \"\"\"\n\n        if len(xml) == 0:  # 遍历到底层，直接返回tag对应的信息\n            return {xml.tag: xml.text}\n\n        result = {}\n        for child in xml:\n            child_result = self.parse_xml_to_dict(child)  # 递归遍历标签信息\n            if child.tag != 'object':\n                result[child.tag] = child_result[child.tag]\n            else:\n                if child.tag not in result:  # 因为object可能有多个，所以需要放入列表里\n                    result[child.tag] = []\n                result[child.tag].append(child_result[child.tag])\n        return {xml.tag: result}\n\n    def coco_index(self, idx):\n        \"\"\"\n        该方法是专门为pycocotools统计标签信息准备，不对图像和标签作任何处理\n        由于不用去读取图片，可大幅缩减统计时间\n\n        Args:\n            idx: 输入需要获取图像的索引\n        \"\"\"\n        # read xml\n        xml_path = self.xml_list[idx]\n        with open(xml_path) as fid:\n            xml_str = fid.read()\n        xml = etree.fromstring(xml_str)\n        data = self.parse_xml_to_dict(xml)[\"annotation\"]\n        data_height = int(data[\"size\"][\"height\"])\n        data_width = int(data[\"size\"][\"width\"])\n        # img_path = os.path.join(self.img_root, data[\"filename\"])\n        # image = Image.open(img_path)\n        # if image.format != \"JPEG\":\n        #     raise ValueError(\"Image format not JPEG\")\n        boxes = []\n        labels = []\n        iscrowd = []\n        for obj in data[\"object\"]:\n            xmin = float(obj[\"bndbox\"][\"xmin\"])\n            xmax = float(obj[\"bndbox\"][\"xmax\"])\n            ymin = float(obj[\"bndbox\"][\"ymin\"])\n            ymax = float(obj[\"bndbox\"][\"ymax\"])\n            boxes.append([xmin, ymin, xmax, ymax])\n            labels.append(self.class_dict[obj[\"name\"]])\n            iscrowd.append(int(obj[\"difficult\"]))\n\n        # convert everything into a torch.Tensor\n        boxes = torch.as_tensor(boxes, dtype=torch.float32)\n        labels = torch.as_tensor(labels, dtype=torch.int64)\n        iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)\n        image_id = torch.tensor([idx])\n        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])\n\n        target = {}\n        target[\"boxes\"] = boxes\n        target[\"labels\"] = labels\n        target[\"image_id\"] = image_id\n        target[\"area\"] = area\n        target[\"iscrowd\"] = iscrowd\n\n        return (data_height, data_width), target\n\n    @staticmethod\n    def collate_fn(batch):\n        return tuple(zip(*batch))\n\n# import transforms\n# from draw_box_utils import draw_objs\n# from PIL import Image\n# import json\n# import matplotlib.pyplot as plt\n# import torchvision.transforms as ts\n# import random\n#\n# # read class_indict\n# category_index = {}\n# try:\n#     json_file = open('./pascal_voc_classes.json', 'r')\n#     class_dict = json.load(json_file)\n#     category_index = {str(v): str(k) for k, v in class_dict.items()}\n# except Exception as e:\n#     print(e)\n#     exit(-1)\n#\n# data_transform = {\n#     \"train\": transforms.Compose([transforms.ToTensor(),\n#                                  transforms.RandomHorizontalFlip(0.5)]),\n#     \"val\": transforms.Compose([transforms.ToTensor()])\n# }\n#\n# # load train data set\n# train_data_set = VOCDataSet(os.getcwd(), \"2012\", data_transform[\"train\"], \"train.txt\")\n# print(len(train_data_set))\n# for index in random.sample(range(0, len(train_data_set)), k=5):\n#     img, target = train_data_set[index]\n#     img = ts.ToPILImage()(img)\n#     plot_img = draw_objs(img,\n#                          target[\"boxes\"].numpy(),\n#                          target[\"labels\"].numpy(),\n#                          np.ones(target[\"labels\"].shape[0]),\n#                          category_index=category_index,\n#                          box_thresh=0.5,\n#                          line_thickness=3,\n#                          font='arial.ttf',\n#                          font_size=20)\n#     plt.imshow(plot_img)\n#     plt.show()\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/network_files/__init__.py",
    "content": "from .faster_rcnn_framework import FasterRCNN, FastRCNNPredictor\nfrom .rpn_function import AnchorsGenerator\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/network_files/boxes.py",
    "content": "import torch\nfrom typing import Tuple\nfrom torch import Tensor\nimport torchvision\n\n\ndef nms(boxes, scores, iou_threshold):\n    # type: (Tensor, Tensor, float) -> Tensor\n    \"\"\"\n    Performs non-maximum suppression (NMS) on the boxes according\n    to their intersection-over-union (IoU).\n\n    NMS iteratively removes lower scoring boxes which have an\n    IoU greater than iou_threshold with another (higher scoring)\n    box.\n\n    Parameters\n    ----------\n    boxes : Tensor[N, 4])\n        boxes to perform NMS on. They\n        are expected to be in (x1, y1, x2, y2) format\n    scores : Tensor[N]\n        scores for each one of the boxes\n    iou_threshold : float\n        discards all overlapping\n        boxes with IoU > iou_threshold\n\n    Returns\n    -------\n    keep : Tensor\n        int64 tensor with the indices\n        of the elements that have been kept\n        by NMS, sorted in decreasing order of scores\n    \"\"\"\n    return torch.ops.torchvision.nms(boxes, scores, iou_threshold)\n\n\ndef batched_nms(boxes, scores, idxs, iou_threshold):\n    # type: (Tensor, Tensor, Tensor, float) -> Tensor\n    \"\"\"\n    Performs non-maximum suppression in a batched fashion.\n\n    Each index value correspond to a category, and NMS\n    will not be applied between elements of different categories.\n\n    Parameters\n    ----------\n    boxes : Tensor[N, 4]\n        boxes where NMS will be performed. They\n        are expected to be in (x1, y1, x2, y2) format\n    scores : Tensor[N]\n        scores for each one of the boxes\n    idxs : Tensor[N]\n        indices of the categories for each one of the boxes.\n    iou_threshold : float\n        discards all overlapping boxes\n        with IoU < iou_threshold\n\n    Returns\n    -------\n    keep : Tensor\n        int64 tensor with the indices of\n        the elements that have been kept by NMS, sorted\n        in decreasing order of scores\n    \"\"\"\n    if boxes.numel() == 0:\n        return torch.empty((0,), dtype=torch.int64, device=boxes.device)\n\n    # strategy: in order to perform NMS independently per class.\n    # we add an offset to all the boxes. The offset is dependent\n    # only on the class idx, and is large enough so that boxes\n    # from different classes do not overlap\n    # 获取所有boxes中最大的坐标值（xmin, ymin, xmax, ymax）\n    max_coordinate = boxes.max()\n\n    # to(): Performs Tensor dtype and/or device conversion\n    # 为每一个类别/每一层生成一个很大的偏移量\n    # 这里的to只是让生成tensor的dytpe和device与boxes保持一致\n    offsets = idxs.to(boxes) * (max_coordinate + 1)\n    # boxes加上对应层的偏移量后，保证不同类别/层之间boxes不会有重合的现象\n    boxes_for_nms = boxes + offsets[:, None]\n    keep = nms(boxes_for_nms, scores, iou_threshold)\n    return keep\n\n\ndef remove_small_boxes(boxes, min_size):\n    # type: (Tensor, float) -> Tensor\n    \"\"\"\n    Remove boxes which contains at least one side smaller than min_size.\n    移除宽高小于指定阈值的索引\n    Arguments:\n        boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format\n        min_size (float): minimum size\n\n    Returns:\n        keep (Tensor[K]): indices of the boxes that have both sides\n            larger than min_size\n    \"\"\"\n    ws, hs = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1]  # 预测boxes的宽和高\n    # keep = (ws >= min_size) & (hs >= min_size)  # 当满足宽，高都大于给定阈值时为True\n    keep = torch.logical_and(torch.ge(ws, min_size), torch.ge(hs, min_size))\n    # nonzero(): Returns a tensor containing the indices of all non-zero elements of input\n    # keep = keep.nonzero().squeeze(1)\n    keep = torch.where(keep)[0]\n    return keep\n\n\ndef clip_boxes_to_image(boxes, size):\n    # type: (Tensor, Tuple[int, int]) -> Tensor\n    \"\"\"\n    Clip boxes so that they lie inside an image of size `size`.\n    裁剪预测的boxes信息，将越界的坐标调整到图片边界上\n\n    Arguments:\n        boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format\n        size (Tuple[height, width]): size of the image\n\n    Returns:\n        clipped_boxes (Tensor[N, 4])\n    \"\"\"\n    dim = boxes.dim()\n    boxes_x = boxes[..., 0::2]  # x1, x2\n    boxes_y = boxes[..., 1::2]  # y1, y2\n    height, width = size\n\n    if torchvision._is_tracing():\n        boxes_x = torch.max(boxes_x, torch.tensor(0, dtype=boxes.dtype, device=boxes.device))\n        boxes_x = torch.min(boxes_x, torch.tensor(width, dtype=boxes.dtype, device=boxes.device))\n        boxes_y = torch.max(boxes_y, torch.tensor(0, dtype=boxes.dtype, device=boxes.device))\n        boxes_y = torch.min(boxes_y, torch.tensor(height, dtype=boxes.dtype, device=boxes.device))\n    else:\n        boxes_x = boxes_x.clamp(min=0, max=width)   # 限制x坐标范围在[0,width]之间\n        boxes_y = boxes_y.clamp(min=0, max=height)  # 限制y坐标范围在[0,height]之间\n\n    clipped_boxes = torch.stack((boxes_x, boxes_y), dim=dim)\n    return clipped_boxes.reshape(boxes.shape)\n\n\ndef box_area(boxes):\n    \"\"\"\n    Computes the area of a set of bounding boxes, which are specified by its\n    (x1, y1, x2, y2) coordinates.\n\n    Arguments:\n        boxes (Tensor[N, 4]): boxes for which the area will be computed. They\n            are expected to be in (x1, y1, x2, y2) format\n\n    Returns:\n        area (Tensor[N]): area for each box\n    \"\"\"\n    return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])\n\n\ndef box_iou(boxes1, boxes2):\n    \"\"\"\n    Return intersection-over-union (Jaccard index) of boxes.\n\n    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.\n\n    Arguments:\n        boxes1 (Tensor[N, 4])\n        boxes2 (Tensor[M, 4])\n\n    Returns:\n        iou (Tensor[N, M]): the NxM matrix containing the pairwise\n            IoU values for every element in boxes1 and boxes2\n    \"\"\"\n    area1 = box_area(boxes1)\n    area2 = box_area(boxes2)\n\n    #  When the shapes do not match,\n    #  the shape of the returned output tensor follows the broadcasting rules\n    lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # left-top [N,M,2]\n    rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # right-bottom [N,M,2]\n\n    wh = (rb - lt).clamp(min=0)  # [N,M,2]\n    inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]\n\n    iou = inter / (area1[:, None] + area2 - inter)\n    return iou\n\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/network_files/det_utils.py",
    "content": "import torch\nimport math\nfrom typing import List, Tuple\nfrom torch import Tensor\n\n\nclass BalancedPositiveNegativeSampler(object):\n    \"\"\"\n    This class samples batches, ensuring that they contain a fixed proportion of positives\n    \"\"\"\n\n    def __init__(self, batch_size_per_image, positive_fraction):\n        # type: (int, float) -> None\n        \"\"\"\n        Arguments:\n            batch_size_per_image (int): number of elements to be selected per image\n            positive_fraction (float): percentage of positive elements per batch\n        \"\"\"\n        self.batch_size_per_image = batch_size_per_image\n        self.positive_fraction = positive_fraction\n\n    def __call__(self, matched_idxs):\n        # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]\n        \"\"\"\n        Arguments:\n            matched idxs: list of tensors containing -1, 0 or positive values.\n                Each tensor corresponds to a specific image.\n                -1 values are ignored, 0 are considered as negatives and > 0 as\n                positives.\n\n        Returns:\n            pos_idx (list[tensor])\n            neg_idx (list[tensor])\n\n        Returns two lists of binary masks for each image.\n        The first list contains the positive elements that were selected,\n        and the second list the negative example.\n        \"\"\"\n        pos_idx = []\n        neg_idx = []\n        # 遍历每张图像的matched_idxs\n        for matched_idxs_per_image in matched_idxs:\n            # >= 1的为正样本, nonzero返回非零元素索引\n            # positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1)\n            positive = torch.where(torch.ge(matched_idxs_per_image, 1))[0]\n            # = 0的为负样本\n            # negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1)\n            negative = torch.where(torch.eq(matched_idxs_per_image, 0))[0]\n\n            # 指定正样本的数量\n            num_pos = int(self.batch_size_per_image * self.positive_fraction)\n            # protect against not enough positive examples\n            # 如果正样本数量不够就直接采用所有正样本\n            num_pos = min(positive.numel(), num_pos)\n            # 指定负样本数量\n            num_neg = self.batch_size_per_image - num_pos\n            # protect against not enough negative examples\n            # 如果负样本数量不够就直接采用所有负样本\n            num_neg = min(negative.numel(), num_neg)\n\n            # randomly select positive and negative examples\n            # Returns a random permutation of integers from 0 to n - 1.\n            # 随机选择指定数量的正负样本\n            perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]\n            perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]\n\n            pos_idx_per_image = positive[perm1]\n            neg_idx_per_image = negative[perm2]\n\n            # create binary mask from indices\n            pos_idx_per_image_mask = torch.zeros_like(\n                matched_idxs_per_image, dtype=torch.uint8\n            )\n            neg_idx_per_image_mask = torch.zeros_like(\n                matched_idxs_per_image, dtype=torch.uint8\n            )\n\n            pos_idx_per_image_mask[pos_idx_per_image] = 1\n            neg_idx_per_image_mask[neg_idx_per_image] = 1\n\n            pos_idx.append(pos_idx_per_image_mask)\n            neg_idx.append(neg_idx_per_image_mask)\n\n        return pos_idx, neg_idx\n\n\n@torch.jit._script_if_tracing\ndef encode_boxes(reference_boxes, proposals, weights):\n    # type: (torch.Tensor, torch.Tensor, torch.Tensor) -> torch.Tensor\n    \"\"\"\n    Encode a set of proposals with respect to some\n    reference boxes\n\n    Arguments:\n        reference_boxes (Tensor): reference boxes(gt)\n        proposals (Tensor): boxes to be encoded(anchors)\n        weights:\n    \"\"\"\n\n    # perform some unpacking to make it JIT-fusion friendly\n    wx = weights[0]\n    wy = weights[1]\n    ww = weights[2]\n    wh = weights[3]\n\n    # unsqueeze()\n    # Returns a new tensor with a dimension of size one inserted at the specified position.\n    proposals_x1 = proposals[:, 0].unsqueeze(1)\n    proposals_y1 = proposals[:, 1].unsqueeze(1)\n    proposals_x2 = proposals[:, 2].unsqueeze(1)\n    proposals_y2 = proposals[:, 3].unsqueeze(1)\n\n    reference_boxes_x1 = reference_boxes[:, 0].unsqueeze(1)\n    reference_boxes_y1 = reference_boxes[:, 1].unsqueeze(1)\n    reference_boxes_x2 = reference_boxes[:, 2].unsqueeze(1)\n    reference_boxes_y2 = reference_boxes[:, 3].unsqueeze(1)\n\n    # implementation starts here\n    # parse widths and heights\n    ex_widths = proposals_x2 - proposals_x1\n    ex_heights = proposals_y2 - proposals_y1\n    # parse coordinate of center point\n    ex_ctr_x = proposals_x1 + 0.5 * ex_widths\n    ex_ctr_y = proposals_y1 + 0.5 * ex_heights\n\n    gt_widths = reference_boxes_x2 - reference_boxes_x1\n    gt_heights = reference_boxes_y2 - reference_boxes_y1\n    gt_ctr_x = reference_boxes_x1 + 0.5 * gt_widths\n    gt_ctr_y = reference_boxes_y1 + 0.5 * gt_heights\n\n    targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths\n    targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights\n    targets_dw = ww * torch.log(gt_widths / ex_widths)\n    targets_dh = wh * torch.log(gt_heights / ex_heights)\n\n    targets = torch.cat((targets_dx, targets_dy, targets_dw, targets_dh), dim=1)\n    return targets\n\n\nclass BoxCoder(object):\n    \"\"\"\n    This class encodes and decodes a set of bounding boxes into\n    the representation used for training the regressors.\n    \"\"\"\n\n    def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)):\n        # type: (Tuple[float, float, float, float], float) -> None\n        \"\"\"\n        Arguments:\n            weights (4-element tuple)\n            bbox_xform_clip (float)\n        \"\"\"\n        self.weights = weights\n        self.bbox_xform_clip = bbox_xform_clip\n\n    def encode(self, reference_boxes, proposals):\n        # type: (List[Tensor], List[Tensor]) -> List[Tensor]\n        \"\"\"\n        结合anchors和与之对应的gt计算regression参数\n        Args:\n            reference_boxes: List[Tensor] 每个proposal/anchor对应的gt_boxes\n            proposals: List[Tensor] anchors/proposals\n\n        Returns: regression parameters\n\n        \"\"\"\n        # 统计每张图像的anchors个数，方便后面拼接在一起处理后在分开\n        # reference_boxes和proposal数据结构相同\n        boxes_per_image = [len(b) for b in reference_boxes]\n        reference_boxes = torch.cat(reference_boxes, dim=0)\n        proposals = torch.cat(proposals, dim=0)\n\n        # targets_dx, targets_dy, targets_dw, targets_dh\n        targets = self.encode_single(reference_boxes, proposals)\n        return targets.split(boxes_per_image, 0)\n\n    def encode_single(self, reference_boxes, proposals):\n        \"\"\"\n        Encode a set of proposals with respect to some\n        reference boxes\n\n        Arguments:\n            reference_boxes (Tensor): reference boxes\n            proposals (Tensor): boxes to be encoded\n        \"\"\"\n        dtype = reference_boxes.dtype\n        device = reference_boxes.device\n        weights = torch.as_tensor(self.weights, dtype=dtype, device=device)\n        targets = encode_boxes(reference_boxes, proposals, weights)\n\n        return targets\n\n    def decode(self, rel_codes, boxes):\n        # type: (Tensor, List[Tensor]) -> Tensor\n        \"\"\"\n\n        Args:\n            rel_codes: bbox regression parameters\n            boxes: anchors/proposals\n\n        Returns:\n\n        \"\"\"\n        assert isinstance(boxes, (list, tuple))\n        assert isinstance(rel_codes, torch.Tensor)\n        boxes_per_image = [b.size(0) for b in boxes]\n        concat_boxes = torch.cat(boxes, dim=0)\n\n        box_sum = 0\n        for val in boxes_per_image:\n            box_sum += val\n\n        # 将预测的bbox回归参数应用到对应anchors上得到预测bbox的坐标\n        pred_boxes = self.decode_single(\n            rel_codes, concat_boxes\n        )\n\n        # 防止pred_boxes为空时导致reshape报错\n        if box_sum > 0:\n            pred_boxes = pred_boxes.reshape(box_sum, -1, 4)\n\n        return pred_boxes\n\n    def decode_single(self, rel_codes, boxes):\n        \"\"\"\n        From a set of original boxes and encoded relative box offsets,\n        get the decoded boxes.\n\n        Arguments:\n            rel_codes (Tensor): encoded boxes (bbox regression parameters)\n            boxes (Tensor): reference boxes (anchors/proposals)\n        \"\"\"\n        boxes = boxes.to(rel_codes.dtype)\n\n        # xmin, ymin, xmax, ymax\n        widths = boxes[:, 2] - boxes[:, 0]   # anchor/proposal宽度\n        heights = boxes[:, 3] - boxes[:, 1]  # anchor/proposal高度\n        ctr_x = boxes[:, 0] + 0.5 * widths   # anchor/proposal中心x坐标\n        ctr_y = boxes[:, 1] + 0.5 * heights  # anchor/proposal中心y坐标\n\n        wx, wy, ww, wh = self.weights  # RPN中为[1,1,1,1], fastrcnn中为[10,10,5,5]\n        dx = rel_codes[:, 0::4] / wx   # 预测anchors/proposals的中心坐标x回归参数\n        dy = rel_codes[:, 1::4] / wy   # 预测anchors/proposals的中心坐标y回归参数\n        dw = rel_codes[:, 2::4] / ww   # 预测anchors/proposals的宽度回归参数\n        dh = rel_codes[:, 3::4] / wh   # 预测anchors/proposals的高度回归参数\n\n        # limit max value, prevent sending too large values into torch.exp()\n        # self.bbox_xform_clip=math.log(1000. / 16)   4.135\n        dw = torch.clamp(dw, max=self.bbox_xform_clip)\n        dh = torch.clamp(dh, max=self.bbox_xform_clip)\n\n        pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]\n        pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]\n        pred_w = torch.exp(dw) * widths[:, None]\n        pred_h = torch.exp(dh) * heights[:, None]\n\n        # xmin\n        pred_boxes1 = pred_ctr_x - torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w\n        # ymin\n        pred_boxes2 = pred_ctr_y - torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h\n        # xmax\n        pred_boxes3 = pred_ctr_x + torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w\n        # ymax\n        pred_boxes4 = pred_ctr_y + torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h\n\n        pred_boxes = torch.stack((pred_boxes1, pred_boxes2, pred_boxes3, pred_boxes4), dim=2).flatten(1)\n        return pred_boxes\n\n\nclass Matcher(object):\n    BELOW_LOW_THRESHOLD = -1\n    BETWEEN_THRESHOLDS = -2\n\n    __annotations__ = {\n        'BELOW_LOW_THRESHOLD': int,\n        'BETWEEN_THRESHOLDS': int,\n    }\n\n    def __init__(self, high_threshold, low_threshold, allow_low_quality_matches=False):\n        # type: (float, float, bool) -> None\n        \"\"\"\n        Args:\n            high_threshold (float): quality values greater than or equal to\n                this value are candidate matches.\n            low_threshold (float): a lower quality threshold used to stratify\n                matches into three levels:\n                1) matches >= high_threshold\n                2) BETWEEN_THRESHOLDS matches in [low_threshold, high_threshold)\n                3) BELOW_LOW_THRESHOLD matches in [0, low_threshold)\n            allow_low_quality_matches (bool): if True, produce additional matches\n                for predictions that have only low-quality match candidates. See\n                set_low_quality_matches_ for more details.\n        \"\"\"\n        self.BELOW_LOW_THRESHOLD = -1\n        self.BETWEEN_THRESHOLDS = -2\n        assert low_threshold <= high_threshold\n        self.high_threshold = high_threshold  # 0.7\n        self.low_threshold = low_threshold    # 0.3\n        self.allow_low_quality_matches = allow_low_quality_matches\n\n    def __call__(self, match_quality_matrix):\n        \"\"\"\n        计算anchors与每个gtboxes匹配的iou最大值，并记录索引，\n        iou<low_threshold索引值为-1， low_threshold<=iou<high_threshold索引值为-2\n        Args:\n            match_quality_matrix (Tensor[float]): an MxN tensor, containing the\n            pairwise quality between M ground-truth elements and N predicted elements.\n\n        Returns:\n            matches (Tensor[int64]): an N tensor where N[i] is a matched gt in\n            [0, M - 1] or a negative value indicating that prediction i could not\n            be matched.\n        \"\"\"\n        if match_quality_matrix.numel() == 0:\n            # empty targets or proposals not supported during training\n            if match_quality_matrix.shape[0] == 0:\n                raise ValueError(\n                    \"No ground-truth boxes available for one of the images \"\n                    \"during training\")\n            else:\n                raise ValueError(\n                    \"No proposal boxes available for one of the images \"\n                    \"during training\")\n\n        # match_quality_matrix is M (gt) x N (predicted)\n        # Max over gt elements (dim 0) to find best gt candidate for each prediction\n        # M x N 的每一列代表一个anchors与所有gt的匹配iou值\n        # matched_vals代表每列的最大值，即每个anchors与所有gt匹配的最大iou值\n        # matches对应最大值所在的索引\n        matched_vals, matches = match_quality_matrix.max(dim=0)  # the dimension to reduce.\n        if self.allow_low_quality_matches:\n            all_matches = matches.clone()\n        else:\n            all_matches = None\n\n        # Assign candidate matches with low quality to negative (unassigned) values\n        # 计算iou小于low_threshold的索引\n        below_low_threshold = matched_vals < self.low_threshold\n        # 计算iou在low_threshold与high_threshold之间的索引值\n        between_thresholds = (matched_vals >= self.low_threshold) & (\n            matched_vals < self.high_threshold\n        )\n        # iou小于low_threshold的matches索引置为-1\n        matches[below_low_threshold] = self.BELOW_LOW_THRESHOLD  # -1\n\n        # iou在[low_threshold, high_threshold]之间的matches索引置为-2\n        matches[between_thresholds] = self.BETWEEN_THRESHOLDS    # -2\n\n        if self.allow_low_quality_matches:\n            assert all_matches is not None\n            self.set_low_quality_matches_(matches, all_matches, match_quality_matrix)\n\n        return matches\n\n    def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix):\n        \"\"\"\n        Produce additional matches for predictions that have only low-quality matches.\n        Specifically, for each ground-truth find the set of predictions that have\n        maximum overlap with it (including ties); for each prediction in that set, if\n        it is unmatched, then match it to the ground-truth with which it has the highest\n        quality value.\n        \"\"\"\n        # For each gt, find the prediction with which it has highest quality\n        # 对于每个gt boxes寻找与其iou最大的anchor，\n        # highest_quality_foreach_gt为匹配到的最大iou值\n        highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1)  # the dimension to reduce.\n\n        # Find highest quality match available, even if it is low, including ties\n        # 寻找每个gt boxes与其iou最大的anchor索引，一个gt匹配到的最大iou可能有多个anchor\n        # gt_pred_pairs_of_highest_quality = torch.nonzero(\n        #     match_quality_matrix == highest_quality_foreach_gt[:, None]\n        # )\n        gt_pred_pairs_of_highest_quality = torch.where(\n            torch.eq(match_quality_matrix, highest_quality_foreach_gt[:, None])\n        )\n        # Example gt_pred_pairs_of_highest_quality:\n        #   tensor([[    0, 39796],\n        #           [    1, 32055],\n        #           [    1, 32070],\n        #           [    2, 39190],\n        #           [    2, 40255],\n        #           [    3, 40390],\n        #           [    3, 41455],\n        #           [    4, 45470],\n        #           [    5, 45325],\n        #           [    5, 46390]])\n        # Each row is a (gt index, prediction index)\n        # Note how gt items 1, 2, 3, and 5 each have two ties\n\n        # gt_pred_pairs_of_highest_quality[:, 0]代表是对应的gt index(不需要)\n        # pre_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1]\n        pre_inds_to_update = gt_pred_pairs_of_highest_quality[1]\n        # 保留该anchor匹配gt最大iou的索引，即使iou低于设定的阈值\n        matches[pre_inds_to_update] = all_matches[pre_inds_to_update]\n\n\ndef smooth_l1_loss(input, target, beta: float = 1. / 9, size_average: bool = True):\n    \"\"\"\n    very similar to the smooth_l1_loss from pytorch, but with\n    the extra beta parameter\n    \"\"\"\n    n = torch.abs(input - target)\n    # cond = n < beta\n    cond = torch.lt(n, beta)\n    loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)\n    if size_average:\n        return loss.mean()\n    return loss.sum()\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/network_files/faster_rcnn_framework.py",
    "content": "import warnings\nfrom collections import OrderedDict\nfrom typing import Tuple, List, Dict, Optional, Union\n\nimport torch\nfrom torch import nn, Tensor\nimport torch.nn.functional as F\nfrom torchvision.ops import MultiScaleRoIAlign\n\nfrom .roi_head import RoIHeads\nfrom .transform import GeneralizedRCNNTransform\nfrom .rpn_function import AnchorsGenerator, RPNHead, RegionProposalNetwork\n\n\nclass FasterRCNNBase(nn.Module):\n    \"\"\"\n    Main class for Generalized R-CNN.\n\n    Arguments:\n        backbone (nn.Module):\n        rpn (nn.Module):\n        roi_heads (nn.Module): takes the features + the proposals from the RPN and computes\n            detections / masks from it.\n        transform (nn.Module): performs the data transformation from the inputs to feed into\n            the model\n    \"\"\"\n\n    def __init__(self, backbone, rpn, roi_heads, transform):\n        super(FasterRCNNBase, self).__init__()\n        self.transform = transform\n        self.backbone = backbone\n        self.rpn = rpn\n        self.roi_heads = roi_heads\n        # used only on torchscript mode\n        self._has_warned = False\n\n    @torch.jit.unused\n    def eager_outputs(self, losses, detections):\n        # type: (Dict[str, Tensor], List[Dict[str, Tensor]]) -> Union[Dict[str, Tensor], List[Dict[str, Tensor]]]\n        if self.training:\n            return losses\n\n        return detections\n\n    def forward(self, images, targets=None):\n        # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]\n        \"\"\"\n        Arguments:\n            images (list[Tensor]): images to be processed\n            targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional)\n\n        Returns:\n            result (list[BoxList] or dict[Tensor]): the output from the model.\n                During training, it returns a dict[Tensor] which contains the losses.\n                During testing, it returns list[BoxList] contains additional fields\n                like `scores`, `labels` and `mask` (for Mask R-CNN models).\n\n        \"\"\"\n        if self.training and targets is None:\n            raise ValueError(\"In training mode, targets should be passed\")\n\n        if self.training:\n            assert targets is not None\n            for target in targets:         # 进一步判断传入的target的boxes参数是否符合规定\n                boxes = target[\"boxes\"]\n                if isinstance(boxes, torch.Tensor):\n                    if len(boxes.shape) != 2 or boxes.shape[-1] != 4:\n                        raise ValueError(\"Expected target boxes to be a tensor\"\n                                         \"of shape [N, 4], got {:}.\".format(\n                                          boxes.shape))\n                else:\n                    raise ValueError(\"Expected target boxes to be of type \"\n                                     \"Tensor, got {:}.\".format(type(boxes)))\n\n        original_image_sizes = torch.jit.annotate(List[Tuple[int, int]], [])\n        for img in images:\n            val = img.shape[-2:]\n            assert len(val) == 2  # 防止输入的是个一维向量\n            original_image_sizes.append((val[0], val[1]))\n        # original_image_sizes = [img.shape[-2:] for img in images]\n\n        images, targets = self.transform(images, targets)  # 对图像进行预处理\n\n        # print(images.tensors.shape)\n        features = self.backbone(images.tensors)  # 将图像输入backbone得到特征图\n        if isinstance(features, torch.Tensor):  # 若只在一层特征层上预测，将feature放入有序字典中，并编号为‘0’\n            features = OrderedDict([('0', features)])  # 若在多层特征层上预测，传入的就是一个有序字典\n\n        # 将特征层以及标注target信息传入rpn中\n        # proposals: List[Tensor], Tensor_shape: [num_proposals, 4],\n        # 每个proposals是绝对坐标，且为(x1, y1, x2, y2)格式\n        proposals, proposal_losses = self.rpn(images, features, targets)\n\n        # 将rpn生成的数据以及标注target信息传入fast rcnn后半部分\n        detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets)\n\n        # 对网络的预测结果进行后处理（主要将bboxes还原到原图像尺度上）\n        detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)\n\n        losses = {}\n        losses.update(detector_losses)\n        losses.update(proposal_losses)\n\n        if torch.jit.is_scripting():\n            if not self._has_warned:\n                warnings.warn(\"RCNN always returns a (Losses, Detections) tuple in scripting\")\n                self._has_warned = True\n            return losses, detections\n        else:\n            return self.eager_outputs(losses, detections)\n\n        # if self.training:\n        #     return losses\n        #\n        # return detections\n\n\nclass TwoMLPHead(nn.Module):\n    \"\"\"\n    Standard heads for FPN-based models\n\n    Arguments:\n        in_channels (int): number of input channels\n        representation_size (int): size of the intermediate representation\n    \"\"\"\n\n    def __init__(self, in_channels, representation_size):\n        super(TwoMLPHead, self).__init__()\n\n        self.fc6 = nn.Linear(in_channels, representation_size)\n        self.fc7 = nn.Linear(representation_size, representation_size)\n\n    def forward(self, x):\n        x = x.flatten(start_dim=1)\n\n        x = F.relu(self.fc6(x))\n        x = F.relu(self.fc7(x))\n\n        return x\n\n\nclass FastRCNNPredictor(nn.Module):\n    \"\"\"\n    Standard classification + bounding box regression layers\n    for Fast R-CNN.\n\n    Arguments:\n        in_channels (int): number of input channels\n        num_classes (int): number of output classes (including background)\n    \"\"\"\n\n    def __init__(self, in_channels, num_classes):\n        super(FastRCNNPredictor, self).__init__()\n        self.cls_score = nn.Linear(in_channels, num_classes)\n        self.bbox_pred = nn.Linear(in_channels, num_classes * 4)\n\n    def forward(self, x):\n        if x.dim() == 4:\n            assert list(x.shape[2:]) == [1, 1]\n        x = x.flatten(start_dim=1)\n        scores = self.cls_score(x)\n        bbox_deltas = self.bbox_pred(x)\n\n        return scores, bbox_deltas\n\n\nclass FasterRCNN(FasterRCNNBase):\n    \"\"\"\n    Implements Faster R-CNN.\n\n    The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each\n    image, and should be in 0-1 range. Different images can have different sizes.\n\n    The behavior of the model changes depending if it is in training or evaluation mode.\n\n    During training, the model expects both the input tensors, as well as a targets (list of dictionary),\n    containing:\n        - boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with values\n          between 0 and H and 0 and W\n        - labels (Int64Tensor[N]): the class label for each ground-truth box\n\n    The model returns a Dict[Tensor] during training, containing the classification and regression\n    losses for both the RPN and the R-CNN.\n\n    During inference, the model requires only the input tensors, and returns the post-processed\n    predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as\n    follows:\n        - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values between\n          0 and H and 0 and W\n        - labels (Int64Tensor[N]): the predicted labels for each image\n        - scores (Tensor[N]): the scores or each prediction\n\n    Arguments:\n        backbone (nn.Module): the network used to compute the features for the model.\n            It should contain a out_channels attribute, which indicates the number of output\n            channels that each feature map has (and it should be the same for all feature maps).\n            The backbone should return a single Tensor or and OrderedDict[Tensor].\n        num_classes (int): number of output classes of the model (including the background).\n            If box_predictor is specified, num_classes should be None.\n        min_size (int): minimum size of the image to be rescaled before feeding it to the backbone\n        max_size (int): maximum size of the image to be rescaled before feeding it to the backbone\n        image_mean (Tuple[float, float, float]): mean values used for input normalization.\n            They are generally the mean values of the dataset on which the backbone has been trained\n            on\n        image_std (Tuple[float, float, float]): std values used for input normalization.\n            They are generally the std values of the dataset on which the backbone has been trained on\n        rpn_anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature\n            maps.\n        rpn_head (nn.Module): module that computes the objectness and regression deltas from the RPN\n        rpn_pre_nms_top_n_train (int): number of proposals to keep before applying NMS during training\n        rpn_pre_nms_top_n_test (int): number of proposals to keep before applying NMS during testing\n        rpn_post_nms_top_n_train (int): number of proposals to keep after applying NMS during training\n        rpn_post_nms_top_n_test (int): number of proposals to keep after applying NMS during testing\n        rpn_nms_thresh (float): NMS threshold used for postprocessing the RPN proposals\n        rpn_fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be\n            considered as positive during training of the RPN.\n        rpn_bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be\n            considered as negative during training of the RPN.\n        rpn_batch_size_per_image (int): number of anchors that are sampled during training of the RPN\n            for computing the loss\n        rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training\n            of the RPN\n        rpn_score_thresh (float): during inference, only return proposals with a classification score\n            greater than rpn_score_thresh\n        box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in\n            the locations indicated by the bounding boxes\n        box_head (nn.Module): module that takes the cropped feature maps as input\n        box_predictor (nn.Module): module that takes the output of box_head and returns the\n            classification logits and box regression deltas.\n        box_score_thresh (float): during inference, only return proposals with a classification score\n            greater than box_score_thresh\n        box_nms_thresh (float): NMS threshold for the prediction head. Used during inference\n        box_detections_per_img (int): maximum number of detections per image, for all classes.\n        box_fg_iou_thresh (float): minimum IoU between the proposals and the GT box so that they can be\n            considered as positive during training of the classification head\n        box_bg_iou_thresh (float): maximum IoU between the proposals and the GT box so that they can be\n            considered as negative during training of the classification head\n        box_batch_size_per_image (int): number of proposals that are sampled during training of the\n            classification head\n        box_positive_fraction (float): proportion of positive proposals in a mini-batch during training\n            of the classification head\n        bbox_reg_weights (Tuple[float, float, float, float]): weights for the encoding/decoding of the\n            bounding boxes\n\n    \"\"\"\n\n    def __init__(self, backbone, num_classes=None,\n                 # transform parameter\n                 min_size=800, max_size=1333,      # 预处理resize时限制的最小尺寸与最大尺寸\n                 image_mean=None, image_std=None,  # 预处理normalize时使用的均值和方差\n                 # RPN parameters\n                 rpn_anchor_generator=None, rpn_head=None,\n                 rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000,    # rpn中在nms处理前保留的proposal数(根据score)\n                 rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000,  # rpn中在nms处理后保留的proposal数\n                 rpn_nms_thresh=0.7,  # rpn中进行nms处理时使用的iou阈值\n                 rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3,  # rpn计算损失时，采集正负样本设置的阈值\n                 rpn_batch_size_per_image=256, rpn_positive_fraction=0.5,  # rpn计算损失时采样的样本数，以及正样本占总样本的比例\n                 rpn_score_thresh=0.0,\n                 # Box parameters\n                 box_roi_pool=None, box_head=None, box_predictor=None,\n                 # 移除低目标概率      fast rcnn中进行nms处理的阈值   对预测结果根据score排序取前100个目标\n                 box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100,\n                 box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5,   # fast rcnn计算误差时，采集正负样本设置的阈值\n                 box_batch_size_per_image=512, box_positive_fraction=0.25,  # fast rcnn计算误差时采样的样本数，以及正样本占所有样本的比例\n                 bbox_reg_weights=None):\n        if not hasattr(backbone, \"out_channels\"):\n            raise ValueError(\n                \"backbone should contain an attribute out_channels\"\n                \"specifying the number of output channels  (assumed to be the\"\n                \"same for all the levels\"\n            )\n\n        assert isinstance(rpn_anchor_generator, (AnchorsGenerator, type(None)))\n        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))\n\n        if num_classes is not None:\n            if box_predictor is not None:\n                raise ValueError(\"num_classes should be None when box_predictor \"\n                                 \"is specified\")\n        else:\n            if box_predictor is None:\n                raise ValueError(\"num_classes should not be None when box_predictor \"\n                                 \"is not specified\")\n\n        # 预测特征层的channels\n        out_channels = backbone.out_channels\n\n        # 若anchor生成器为空，则自动生成针对resnet50_fpn的anchor生成器\n        if rpn_anchor_generator is None:\n            anchor_sizes = ((32,), (64,), (128,), (256,), (512,))\n            aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)\n            rpn_anchor_generator = AnchorsGenerator(\n                anchor_sizes, aspect_ratios\n            )\n\n        # 生成RPN通过滑动窗口预测网络部分\n        if rpn_head is None:\n            rpn_head = RPNHead(\n                out_channels, rpn_anchor_generator.num_anchors_per_location()[0]\n            )\n\n        # 默认rpn_pre_nms_top_n_train = 2000, rpn_pre_nms_top_n_test = 1000,\n        # 默认rpn_post_nms_top_n_train = 2000, rpn_post_nms_top_n_test = 1000,\n        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test)\n        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)\n\n        # 定义整个RPN框架\n        rpn = RegionProposalNetwork(\n            rpn_anchor_generator, rpn_head,\n            rpn_fg_iou_thresh, rpn_bg_iou_thresh,\n            rpn_batch_size_per_image, rpn_positive_fraction,\n            rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh,\n            score_thresh=rpn_score_thresh)\n\n        #  Multi-scale RoIAlign pooling\n        if box_roi_pool is None:\n            box_roi_pool = MultiScaleRoIAlign(\n                featmap_names=['0', '1', '2', '3'],  # 在哪些特征层进行roi pooling\n                output_size=[7, 7],\n                sampling_ratio=2)\n\n        # fast RCNN中roi pooling后的展平处理两个全连接层部分\n        if box_head is None:\n            resolution = box_roi_pool.output_size[0]  # 默认等于7\n            representation_size = 1024\n            box_head = TwoMLPHead(\n                out_channels * resolution ** 2,\n                representation_size\n            )\n\n        # 在box_head的输出上预测部分\n        if box_predictor is None:\n            representation_size = 1024\n            box_predictor = FastRCNNPredictor(\n                representation_size,\n                num_classes)\n\n        # 将roi pooling, box_head以及box_predictor结合在一起\n        roi_heads = RoIHeads(\n            # box\n            box_roi_pool, box_head, box_predictor,\n            box_fg_iou_thresh, box_bg_iou_thresh,  # 0.5  0.5\n            box_batch_size_per_image, box_positive_fraction,  # 512  0.25\n            bbox_reg_weights,\n            box_score_thresh, box_nms_thresh, box_detections_per_img)  # 0.05  0.5  100\n\n        if image_mean is None:\n            image_mean = [0.485, 0.456, 0.406]\n        if image_std is None:\n            image_std = [0.229, 0.224, 0.225]\n\n        # 对数据进行标准化，缩放，打包成batch等处理部分\n        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std)\n\n        super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform)\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/network_files/image_list.py",
    "content": "from typing import List, Tuple\nfrom torch import Tensor\n\n\nclass ImageList(object):\n    \"\"\"\n    Structure that holds a list of images (of possibly\n    varying sizes) as a single tensor.\n    This works by padding the images to the same size,\n    and storing in a field the original sizes of each image\n    \"\"\"\n\n    def __init__(self, tensors, image_sizes):\n        # type: (Tensor, List[Tuple[int, int]]) -> None\n        \"\"\"\n        Arguments:\n            tensors (tensor) padding后的图像数据\n            image_sizes (list[tuple[int, int]])  padding前的图像尺寸\n        \"\"\"\n        self.tensors = tensors\n        self.image_sizes = image_sizes\n\n    def to(self, device):\n        # type: (Device) -> ImageList # noqa\n        cast_tensor = self.tensors.to(device)\n        return ImageList(cast_tensor, self.image_sizes)\n\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/network_files/roi_head.py",
    "content": "from typing import Optional, List, Dict, Tuple\n\nimport torch\nfrom torch import Tensor\nimport torch.nn.functional as F\n\nfrom . import det_utils\nfrom . import boxes as box_ops\n\n\ndef fastrcnn_loss(class_logits, box_regression, labels, regression_targets):\n    # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]\n    \"\"\"\n    Computes the loss for Faster R-CNN.\n\n    Arguments:\n        class_logits : 预测类别概率信息，shape=[num_anchors, num_classes]\n        box_regression : 预测边目标界框回归信息\n        labels : 真实类别信息\n        regression_targets : 真实目标边界框信息\n\n    Returns:\n        classification_loss (Tensor)\n        box_loss (Tensor)\n    \"\"\"\n\n    labels = torch.cat(labels, dim=0)\n    regression_targets = torch.cat(regression_targets, dim=0)\n\n    # 计算类别损失信息\n    classification_loss = F.cross_entropy(class_logits, labels)\n\n    # get indices that correspond to the regression targets for\n    # the corresponding ground truth labels, to be used with\n    # advanced indexing\n    # 返回标签类别大于0的索引\n    # sampled_pos_inds_subset = torch.nonzero(torch.gt(labels, 0)).squeeze(1)\n    sampled_pos_inds_subset = torch.where(torch.gt(labels, 0))[0]\n\n    # 返回标签类别大于0位置的类别信息\n    labels_pos = labels[sampled_pos_inds_subset]\n\n    # shape=[num_proposal, num_classes]\n    N, num_classes = class_logits.shape\n    box_regression = box_regression.reshape(N, -1, 4)\n\n    # 计算边界框损失信息\n    box_loss = det_utils.smooth_l1_loss(\n        # 获取指定索引proposal的指定类别box信息\n        box_regression[sampled_pos_inds_subset, labels_pos],\n        regression_targets[sampled_pos_inds_subset],\n        beta=1 / 9,\n        size_average=False,\n    ) / labels.numel()\n\n    return classification_loss, box_loss\n\n\nclass RoIHeads(torch.nn.Module):\n    __annotations__ = {\n        'box_coder': det_utils.BoxCoder,\n        'proposal_matcher': det_utils.Matcher,\n        'fg_bg_sampler': det_utils.BalancedPositiveNegativeSampler,\n    }\n\n    def __init__(self,\n                 box_roi_pool,   # Multi-scale RoIAlign pooling\n                 box_head,       # TwoMLPHead\n                 box_predictor,  # FastRCNNPredictor\n                 # Faster R-CNN training\n                 fg_iou_thresh, bg_iou_thresh,  # default: 0.5, 0.5\n                 batch_size_per_image, positive_fraction,  # default: 512, 0.25\n                 bbox_reg_weights,  # None\n                 # Faster R-CNN inference\n                 score_thresh,        # default: 0.05\n                 nms_thresh,          # default: 0.5\n                 detection_per_img):  # default: 100\n        super(RoIHeads, self).__init__()\n\n        self.box_similarity = box_ops.box_iou\n        # assign ground-truth boxes for each proposal\n        self.proposal_matcher = det_utils.Matcher(\n            fg_iou_thresh,  # default: 0.5\n            bg_iou_thresh,  # default: 0.5\n            allow_low_quality_matches=False)\n\n        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(\n            batch_size_per_image,  # default: 512\n            positive_fraction)     # default: 0.25\n\n        if bbox_reg_weights is None:\n            bbox_reg_weights = (10., 10., 5., 5.)\n        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)\n\n        self.box_roi_pool = box_roi_pool    # Multi-scale RoIAlign pooling\n        self.box_head = box_head            # TwoMLPHead\n        self.box_predictor = box_predictor  # FastRCNNPredictor\n\n        self.score_thresh = score_thresh  # default: 0.05\n        self.nms_thresh = nms_thresh      # default: 0.5\n        self.detection_per_img = detection_per_img  # default: 100\n\n    def assign_targets_to_proposals(self, proposals, gt_boxes, gt_labels):\n        # type: (List[Tensor], List[Tensor], List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]\n        \"\"\"\n        为每个proposal匹配对应的gt_box，并划分到正负样本中\n        Args:\n            proposals:\n            gt_boxes:\n            gt_labels:\n\n        Returns:\n\n        \"\"\"\n        matched_idxs = []\n        labels = []\n        # 遍历每张图像的proposals, gt_boxes, gt_labels信息\n        for proposals_in_image, gt_boxes_in_image, gt_labels_in_image in zip(proposals, gt_boxes, gt_labels):\n            if gt_boxes_in_image.numel() == 0:  # 该张图像中没有gt框，为背景\n                # background image\n                device = proposals_in_image.device\n                clamped_matched_idxs_in_image = torch.zeros(\n                    (proposals_in_image.shape[0],), dtype=torch.int64, device=device\n                )\n                labels_in_image = torch.zeros(\n                    (proposals_in_image.shape[0],), dtype=torch.int64, device=device\n                )\n            else:\n                #  set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands\n                # 计算proposal与每个gt_box的iou重合度\n                match_quality_matrix = box_ops.box_iou(gt_boxes_in_image, proposals_in_image)\n\n                # 计算proposal与每个gt_box匹配的iou最大值，并记录索引，\n                # iou < low_threshold索引值为 -1， low_threshold <= iou < high_threshold索引值为 -2\n                matched_idxs_in_image = self.proposal_matcher(match_quality_matrix)\n\n                # 限制最小值，防止匹配标签时出现越界的情况\n                # 注意-1, -2对应的gt索引会调整到0,获取的标签类别为第0个gt的类别（实际上并不是）,后续会进一步处理\n                clamped_matched_idxs_in_image = matched_idxs_in_image.clamp(min=0)\n                # 获取proposal匹配到的gt对应标签\n                labels_in_image = gt_labels_in_image[clamped_matched_idxs_in_image]\n                labels_in_image = labels_in_image.to(dtype=torch.int64)\n\n                # label background (below the low threshold)\n                # 将gt索引为-1的类别设置为0，即背景，负样本\n                bg_inds = matched_idxs_in_image == self.proposal_matcher.BELOW_LOW_THRESHOLD  # -1\n                labels_in_image[bg_inds] = 0\n\n                # label ignore proposals (between low and high threshold)\n                # 将gt索引为-2的类别设置为-1, 即废弃样本\n                ignore_inds = matched_idxs_in_image == self.proposal_matcher.BETWEEN_THRESHOLDS  # -2\n                labels_in_image[ignore_inds] = -1  # -1 is ignored by sampler\n\n            matched_idxs.append(clamped_matched_idxs_in_image)\n            labels.append(labels_in_image)\n        return matched_idxs, labels\n\n    def subsample(self, labels):\n        # type: (List[Tensor]) -> List[Tensor]\n        # BalancedPositiveNegativeSampler\n        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)\n        sampled_inds = []\n        # 遍历每张图片的正负样本索引\n        for img_idx, (pos_inds_img, neg_inds_img) in enumerate(zip(sampled_pos_inds, sampled_neg_inds)):\n            # 记录所有采集样本索引（包括正样本和负样本）\n            # img_sampled_inds = torch.nonzero(pos_inds_img | neg_inds_img).squeeze(1)\n            img_sampled_inds = torch.where(pos_inds_img | neg_inds_img)[0]\n            sampled_inds.append(img_sampled_inds)\n        return sampled_inds\n\n    def add_gt_proposals(self, proposals, gt_boxes):\n        # type: (List[Tensor], List[Tensor]) -> List[Tensor]\n        \"\"\"\n        将gt_boxes拼接到proposal后面\n        Args:\n            proposals: 一个batch中每张图像rpn预测的boxes\n            gt_boxes:  一个batch中每张图像对应的真实目标边界框\n\n        Returns:\n\n        \"\"\"\n        proposals = [\n            torch.cat((proposal, gt_box))\n            for proposal, gt_box in zip(proposals, gt_boxes)\n        ]\n        return proposals\n\n    def check_targets(self, targets):\n        # type: (Optional[List[Dict[str, Tensor]]]) -> None\n        assert targets is not None\n        assert all([\"boxes\" in t for t in targets])\n        assert all([\"labels\" in t for t in targets])\n\n    def select_training_samples(self,\n                                proposals,  # type: List[Tensor]\n                                targets     # type: Optional[List[Dict[str, Tensor]]]\n                                ):\n        # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]\n        \"\"\"\n        划分正负样本，统计对应gt的标签以及边界框回归信息\n        list元素个数为batch_size\n        Args:\n            proposals: rpn预测的boxes\n            targets:\n\n        Returns:\n\n        \"\"\"\n\n        # 检查target数据是否为空\n        self.check_targets(targets)\n        # 如果不加这句，jit.script会不通过(看不懂)\n        assert targets is not None\n\n        dtype = proposals[0].dtype\n        device = proposals[0].device\n\n        # 获取标注好的boxes以及labels信息\n        gt_boxes = [t[\"boxes\"].to(dtype) for t in targets]\n        gt_labels = [t[\"labels\"] for t in targets]\n\n        # append ground-truth bboxes to proposal\n        # 将gt_boxes拼接到proposal后面\n        proposals = self.add_gt_proposals(proposals, gt_boxes)\n\n        # get matching gt indices for each proposal\n        # 为每个proposal匹配对应的gt_box，并划分到正负样本中\n        matched_idxs, labels = self.assign_targets_to_proposals(proposals, gt_boxes, gt_labels)\n        # sample a fixed proportion of positive-negative proposals\n        # 按给定数量和比例采样正负样本\n        sampled_inds = self.subsample(labels)\n        matched_gt_boxes = []\n        num_images = len(proposals)\n\n        # 遍历每张图像\n        for img_id in range(num_images):\n            # 获取每张图像的正负样本索引\n            img_sampled_inds = sampled_inds[img_id]\n            # 获取对应正负样本的proposals信息\n            proposals[img_id] = proposals[img_id][img_sampled_inds]\n            # 获取对应正负样本的真实类别信息\n            labels[img_id] = labels[img_id][img_sampled_inds]\n            # 获取对应正负样本的gt索引信息\n            matched_idxs[img_id] = matched_idxs[img_id][img_sampled_inds]\n\n            gt_boxes_in_image = gt_boxes[img_id]\n            if gt_boxes_in_image.numel() == 0:\n                gt_boxes_in_image = torch.zeros((1, 4), dtype=dtype, device=device)\n            # 获取对应正负样本的gt box信息\n            matched_gt_boxes.append(gt_boxes_in_image[matched_idxs[img_id]])\n\n        # 根据gt和proposal计算边框回归参数（针对gt的）\n        regression_targets = self.box_coder.encode(matched_gt_boxes, proposals)\n        return proposals, labels, regression_targets\n\n    def postprocess_detections(self,\n                               class_logits,    # type: Tensor\n                               box_regression,  # type: Tensor\n                               proposals,       # type: List[Tensor]\n                               image_shapes     # type: List[Tuple[int, int]]\n                               ):\n        # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]\n        \"\"\"\n        对网络的预测数据进行后处理，包括\n        （1）根据proposal以及预测的回归参数计算出最终bbox坐标\n        （2）对预测类别结果进行softmax处理\n        （3）裁剪预测的boxes信息，将越界的坐标调整到图片边界上\n        （4）移除所有背景信息\n        （5）移除低概率目标\n        （6）移除小尺寸目标\n        （7）执行nms处理，并按scores进行排序\n        （8）根据scores排序返回前topk个目标\n        Args:\n            class_logits: 网络预测类别概率信息\n            box_regression: 网络预测的边界框回归参数\n            proposals: rpn输出的proposal\n            image_shapes: 打包成batch前每张图像的宽高\n\n        Returns:\n\n        \"\"\"\n        device = class_logits.device\n        # 预测目标类别数\n        num_classes = class_logits.shape[-1]\n\n        # 获取每张图像的预测bbox数量\n        boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals]\n        # 根据proposal以及预测的回归参数计算出最终bbox坐标\n        pred_boxes = self.box_coder.decode(box_regression, proposals)\n\n        # 对预测类别结果进行softmax处理\n        pred_scores = F.softmax(class_logits, -1)\n\n        # split boxes and scores per image\n        # 根据每张图像的预测bbox数量分割结果\n        pred_boxes_list = pred_boxes.split(boxes_per_image, 0)\n        pred_scores_list = pred_scores.split(boxes_per_image, 0)\n\n        all_boxes = []\n        all_scores = []\n        all_labels = []\n        # 遍历每张图像预测信息\n        for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes):\n            # 裁剪预测的boxes信息，将越界的坐标调整到图片边界上\n            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)\n\n            # create labels for each prediction\n            labels = torch.arange(num_classes, device=device)\n            labels = labels.view(1, -1).expand_as(scores)\n\n            # remove prediction with the background label\n            # 移除索引为0的所有信息（0代表背景）\n            boxes = boxes[:, 1:]\n            scores = scores[:, 1:]\n            labels = labels[:, 1:]\n\n            # batch everything, by making every class prediction be a separate instance\n            boxes = boxes.reshape(-1, 4)\n            scores = scores.reshape(-1)\n            labels = labels.reshape(-1)\n\n            # remove low scoring boxes\n            # 移除低概率目标，self.scores_thresh=0.05\n            # gt: Computes input > other element-wise.\n            # inds = torch.nonzero(torch.gt(scores, self.score_thresh)).squeeze(1)\n            inds = torch.where(torch.gt(scores, self.score_thresh))[0]\n            boxes, scores, labels = boxes[inds], scores[inds], labels[inds]\n\n            # remove empty boxes\n            # 移除小目标\n            keep = box_ops.remove_small_boxes(boxes, min_size=1.)\n            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]\n\n            # non-maximun suppression, independently done per class\n            # 执行nms处理，执行后的结果会按照scores从大到小进行排序返回\n            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)\n\n            # keep only topk scoring predictions\n            # 获取scores排在前topk个预测目标\n            keep = keep[:self.detection_per_img]\n            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]\n\n            all_boxes.append(boxes)\n            all_scores.append(scores)\n            all_labels.append(labels)\n\n        return all_boxes, all_scores, all_labels\n\n    def forward(self,\n                features,       # type: Dict[str, Tensor]\n                proposals,      # type: List[Tensor]\n                image_shapes,   # type: List[Tuple[int, int]]\n                targets=None    # type: Optional[List[Dict[str, Tensor]]]\n                ):\n        # type: (...) -> Tuple[List[Dict[str, Tensor]], Dict[str, Tensor]]\n        \"\"\"\n        Arguments:\n            features (List[Tensor])\n            proposals (List[Tensor[N, 4]])\n            image_shapes (List[Tuple[H, W]])\n            targets (List[Dict])\n        \"\"\"\n\n        # 检查targets的数据类型是否正确\n        if targets is not None:\n            for t in targets:\n                floating_point_types = (torch.float, torch.double, torch.half)\n                assert t[\"boxes\"].dtype in floating_point_types, \"target boxes must of float type\"\n                assert t[\"labels\"].dtype == torch.int64, \"target labels must of int64 type\"\n\n        if self.training:\n            # 划分正负样本，统计对应gt的标签以及边界框回归信息\n            proposals, labels, regression_targets = self.select_training_samples(proposals, targets)\n        else:\n            labels = None\n            regression_targets = None\n\n        # 将采集样本通过Multi-scale RoIAlign pooling层\n        # box_features_shape: [num_proposals, channel, height, width]\n        box_features = self.box_roi_pool(features, proposals, image_shapes)\n\n        # 通过roi_pooling后的两层全连接层\n        # box_features_shape: [num_proposals, representation_size]\n        box_features = self.box_head(box_features)\n\n        # 接着分别预测目标类别和边界框回归参数\n        class_logits, box_regression = self.box_predictor(box_features)\n\n        result = torch.jit.annotate(List[Dict[str, torch.Tensor]], [])\n        losses = {}\n        if self.training:\n            assert labels is not None and regression_targets is not None\n            loss_classifier, loss_box_reg = fastrcnn_loss(\n                class_logits, box_regression, labels, regression_targets)\n            losses = {\n                \"loss_classifier\": loss_classifier,\n                \"loss_box_reg\": loss_box_reg\n            }\n        else:\n            boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals, image_shapes)\n            num_images = len(boxes)\n            for i in range(num_images):\n                result.append(\n                    {\n                        \"boxes\": boxes[i],\n                        \"labels\": labels[i],\n                        \"scores\": scores[i],\n                    }\n                )\n\n        return result, losses\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/network_files/rpn_function.py",
    "content": "from typing import List, Optional, Dict, Tuple\n\nimport torch\nfrom torch import nn, Tensor\nfrom torch.nn import functional as F\nimport torchvision\n\nfrom . import det_utils\nfrom . import boxes as box_ops\nfrom .image_list import ImageList\n\n\n@torch.jit.unused\ndef _onnx_get_num_anchors_and_pre_nms_top_n(ob, orig_pre_nms_top_n):\n    # type: (Tensor, int) -> Tuple[int, int]\n    from torch.onnx import operators\n    num_anchors = operators.shape_as_tensor(ob)[1].unsqueeze(0)\n    pre_nms_top_n = torch.min(torch.cat(\n        (torch.tensor([orig_pre_nms_top_n], dtype=num_anchors.dtype),\n         num_anchors), 0))\n\n    return num_anchors, pre_nms_top_n\n\n\nclass AnchorsGenerator(nn.Module):\n    __annotations__ = {\n        \"cell_anchors\": Optional[List[torch.Tensor]],\n        \"_cache\": Dict[str, List[torch.Tensor]]\n    }\n\n    \"\"\"\n    anchors生成器\n    Module that generates anchors for a set of feature maps and\n    image sizes.\n\n    The module support computing anchors at multiple sizes and aspect ratios\n    per feature map.\n\n    sizes and aspect_ratios should have the same number of elements, and it should\n    correspond to the number of feature maps.\n\n    sizes[i] and aspect_ratios[i] can have an arbitrary number of elements,\n    and AnchorGenerator will output a set of sizes[i] * aspect_ratios[i] anchors\n    per spatial location for feature map i.\n\n    Arguments:\n        sizes (Tuple[Tuple[int]]):\n        aspect_ratios (Tuple[Tuple[float]]):\n    \"\"\"\n\n    def __init__(self, sizes=(128, 256, 512), aspect_ratios=(0.5, 1.0, 2.0)):\n        super(AnchorsGenerator, self).__init__()\n\n        if not isinstance(sizes[0], (list, tuple)):\n            # TODO change this\n            sizes = tuple((s,) for s in sizes)\n        if not isinstance(aspect_ratios[0], (list, tuple)):\n            aspect_ratios = (aspect_ratios,) * len(sizes)\n\n        assert len(sizes) == len(aspect_ratios)\n\n        self.sizes = sizes\n        self.aspect_ratios = aspect_ratios\n        self.cell_anchors = None\n        self._cache = {}\n\n    def generate_anchors(self, scales, aspect_ratios, dtype=torch.float32, device=torch.device(\"cpu\")):\n        # type: (List[int], List[float], torch.dtype, torch.device) -> Tensor\n        \"\"\"\n        compute anchor sizes\n        Arguments:\n            scales: sqrt(anchor_area)\n            aspect_ratios: h/w ratios\n            dtype: float32\n            device: cpu/gpu\n        \"\"\"\n        scales = torch.as_tensor(scales, dtype=dtype, device=device)\n        aspect_ratios = torch.as_tensor(aspect_ratios, dtype=dtype, device=device)\n        h_ratios = torch.sqrt(aspect_ratios)\n        w_ratios = 1.0 / h_ratios\n\n        # [r1, r2, r3]' * [s1, s2, s3]\n        # number of elements is len(ratios)*len(scales)\n        ws = (w_ratios[:, None] * scales[None, :]).view(-1)\n        hs = (h_ratios[:, None] * scales[None, :]).view(-1)\n\n        # left-top, right-bottom coordinate relative to anchor center(0, 0)\n        # 生成的anchors模板都是以（0, 0）为中心的, shape [len(ratios)*len(scales), 4]\n        base_anchors = torch.stack([-ws, -hs, ws, hs], dim=1) / 2\n\n        return base_anchors.round()  # round 四舍五入\n\n    def set_cell_anchors(self, dtype, device):\n        # type: (torch.dtype, torch.device) -> None\n        if self.cell_anchors is not None:\n            cell_anchors = self.cell_anchors\n            assert cell_anchors is not None\n            # suppose that all anchors have the same device\n            # which is a valid assumption in the current state of the codebase\n            if cell_anchors[0].device == device:\n                return\n\n        # 根据提供的sizes和aspect_ratios生成anchors模板\n        # anchors模板都是以(0, 0)为中心的anchor\n        cell_anchors = [\n            self.generate_anchors(sizes, aspect_ratios, dtype, device)\n            for sizes, aspect_ratios in zip(self.sizes, self.aspect_ratios)\n        ]\n        self.cell_anchors = cell_anchors\n\n    def num_anchors_per_location(self):\n        # 计算每个预测特征层上每个滑动窗口的预测目标数\n        return [len(s) * len(a) for s, a in zip(self.sizes, self.aspect_ratios)]\n\n    # For every combination of (a, (g, s), i) in (self.cell_anchors, zip(grid_sizes, strides), 0:2),\n    # output g[i] anchors that are s[i] distance apart in direction i, with the same dimensions as a.\n    def grid_anchors(self, grid_sizes, strides):\n        # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor]\n        \"\"\"\n        anchors position in grid coordinate axis map into origin image\n        计算预测特征图对应原始图像上的所有anchors的坐标\n        Args:\n            grid_sizes: 预测特征矩阵的height和width\n            strides: 预测特征矩阵上一步对应原始图像上的步距\n        \"\"\"\n        anchors = []\n        cell_anchors = self.cell_anchors\n        assert cell_anchors is not None\n\n        # 遍历每个预测特征层的grid_size，strides和cell_anchors\n        for size, stride, base_anchors in zip(grid_sizes, strides, cell_anchors):\n            grid_height, grid_width = size\n            stride_height, stride_width = stride\n            device = base_anchors.device\n\n            # For output anchor, compute [x_center, y_center, x_center, y_center]\n            # shape: [grid_width] 对应原图上的x坐标(列)\n            shifts_x = torch.arange(0, grid_width, dtype=torch.float32, device=device) * stride_width\n            # shape: [grid_height] 对应原图上的y坐标(行)\n            shifts_y = torch.arange(0, grid_height, dtype=torch.float32, device=device) * stride_height\n\n            # 计算预测特征矩阵上每个点对应原图上的坐标(anchors模板的坐标偏移量)\n            # torch.meshgrid函数分别传入行坐标和列坐标，生成网格行坐标矩阵和网格列坐标矩阵\n            # shape: [grid_height, grid_width]\n            shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)\n            shift_x = shift_x.reshape(-1)\n            shift_y = shift_y.reshape(-1)\n\n            # 计算anchors坐标(xmin, ymin, xmax, ymax)在原图上的坐标偏移量\n            # shape: [grid_width*grid_height, 4]\n            shifts = torch.stack([shift_x, shift_y, shift_x, shift_y], dim=1)\n\n            # For every (base anchor, output anchor) pair,\n            # offset each zero-centered base anchor by the center of the output anchor.\n            # 将anchors模板与原图上的坐标偏移量相加得到原图上所有anchors的坐标信息(shape不同时会使用广播机制)\n            shifts_anchor = shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)\n            anchors.append(shifts_anchor.reshape(-1, 4))\n\n        return anchors  # List[Tensor(all_num_anchors, 4)]\n\n    def cached_grid_anchors(self, grid_sizes, strides):\n        # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor]\n        \"\"\"将计算得到的所有anchors信息进行缓存\"\"\"\n        key = str(grid_sizes) + str(strides)\n        # self._cache是字典类型\n        if key in self._cache:\n            return self._cache[key]\n        anchors = self.grid_anchors(grid_sizes, strides)\n        self._cache[key] = anchors\n        return anchors\n\n    def forward(self, image_list, feature_maps):\n        # type: (ImageList, List[Tensor]) -> List[Tensor]\n        # 获取每个预测特征层的尺寸(height, width)\n        grid_sizes = list([feature_map.shape[-2:] for feature_map in feature_maps])\n\n        # 获取输入图像的height和width\n        image_size = image_list.tensors.shape[-2:]\n\n        # 获取变量类型和设备类型\n        dtype, device = feature_maps[0].dtype, feature_maps[0].device\n\n        # one step in feature map equate n pixel stride in origin image\n        # 计算特征层上的一步等于原始图像上的步长\n        strides = [[torch.tensor(image_size[0] // g[0], dtype=torch.int64, device=device),\n                    torch.tensor(image_size[1] // g[1], dtype=torch.int64, device=device)] for g in grid_sizes]\n\n        # 根据提供的sizes和aspect_ratios生成anchors模板\n        self.set_cell_anchors(dtype, device)\n\n        # 计算/读取所有anchors的坐标信息（这里的anchors信息是映射到原图上的所有anchors信息，不是anchors模板）\n        # 得到的是一个list列表，对应每张预测特征图映射回原图的anchors坐标信息\n        anchors_over_all_feature_maps = self.cached_grid_anchors(grid_sizes, strides)\n\n        anchors = torch.jit.annotate(List[List[torch.Tensor]], [])\n        # 遍历一个batch中的每张图像\n        for i, (image_height, image_width) in enumerate(image_list.image_sizes):\n            anchors_in_image = []\n            # 遍历每张预测特征图映射回原图的anchors坐标信息\n            for anchors_per_feature_map in anchors_over_all_feature_maps:\n                anchors_in_image.append(anchors_per_feature_map)\n            anchors.append(anchors_in_image)\n        # 将每一张图像的所有预测特征层的anchors坐标信息拼接在一起\n        # anchors是个list，每个元素为一张图像的所有anchors信息\n        anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors]\n        # Clear the cache in case that memory leaks.\n        self._cache.clear()\n        return anchors\n\n\nclass RPNHead(nn.Module):\n    \"\"\"\n    add a RPN head with classification and regression\n    通过滑动窗口计算预测目标概率与bbox regression参数\n\n    Arguments:\n        in_channels: number of channels of the input feature\n        num_anchors: number of anchors to be predicted\n    \"\"\"\n\n    def __init__(self, in_channels, num_anchors):\n        super(RPNHead, self).__init__()\n        # 3x3 滑动窗口\n        self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)\n        # 计算预测的目标分数（这里的目标只是指前景或者背景）\n        self.cls_logits = nn.Conv2d(in_channels, num_anchors, kernel_size=1, stride=1)\n        # 计算预测的目标bbox regression参数\n        self.bbox_pred = nn.Conv2d(in_channels, num_anchors * 4, kernel_size=1, stride=1)\n\n        for layer in self.children():\n            if isinstance(layer, nn.Conv2d):\n                torch.nn.init.normal_(layer.weight, std=0.01)\n                torch.nn.init.constant_(layer.bias, 0)\n\n    def forward(self, x):\n        # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]\n        logits = []\n        bbox_reg = []\n        for i, feature in enumerate(x):\n            t = F.relu(self.conv(feature))\n            logits.append(self.cls_logits(t))\n            bbox_reg.append(self.bbox_pred(t))\n        return logits, bbox_reg\n\n\ndef permute_and_flatten(layer, N, A, C, H, W):\n    # type: (Tensor, int, int, int, int, int) -> Tensor\n    \"\"\"\n    调整tensor顺序，并进行reshape\n    Args:\n        layer: 预测特征层上预测的目标概率或bboxes regression参数\n        N: batch_size\n        A: anchors_num_per_position\n        C: classes_num or 4(bbox coordinate)\n        H: height\n        W: width\n\n    Returns:\n        layer: 调整tensor顺序，并reshape后的结果[N, -1, C]\n    \"\"\"\n    # view和reshape功能是一样的，先展平所有元素在按照给定shape排列\n    # view函数只能用于内存中连续存储的tensor，permute等操作会使tensor在内存中变得不再连续，此时就不能再调用view函数\n    # reshape则不需要依赖目标tensor是否在内存中是连续的\n    # [batch_size, anchors_num_per_position * (C or 4), height, width]\n    layer = layer.view(N, -1, C,  H, W)\n    # 调换tensor维度\n    layer = layer.permute(0, 3, 4, 1, 2)  # [N, H, W, -1, C]\n    layer = layer.reshape(N, -1, C)\n    return layer\n\n\ndef concat_box_prediction_layers(box_cls, box_regression):\n    # type: (List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]\n    \"\"\"\n    对box_cla和box_regression两个list中的每个预测特征层的预测信息\n    的tensor排列顺序以及shape进行调整 -> [N, -1, C]\n    Args:\n        box_cls: 每个预测特征层上的预测目标概率\n        box_regression: 每个预测特征层上的预测目标bboxes regression参数\n\n    Returns:\n\n    \"\"\"\n    box_cls_flattened = []\n    box_regression_flattened = []\n\n    # 遍历每个预测特征层\n    for box_cls_per_level, box_regression_per_level in zip(box_cls, box_regression):\n        # [batch_size, anchors_num_per_position * classes_num, height, width]\n        # 注意，当计算RPN中的proposal时，classes_num=1,只区分目标和背景\n        N, AxC, H, W = box_cls_per_level.shape\n        # # [batch_size, anchors_num_per_position * 4, height, width]\n        Ax4 = box_regression_per_level.shape[1]\n        # anchors_num_per_position\n        A = Ax4 // 4\n        # classes_num\n        C = AxC // A\n\n        # [N, -1, C]\n        box_cls_per_level = permute_and_flatten(box_cls_per_level, N, A, C, H, W)\n        box_cls_flattened.append(box_cls_per_level)\n\n        # [N, -1, C]\n        box_regression_per_level = permute_and_flatten(box_regression_per_level, N, A, 4, H, W)\n        box_regression_flattened.append(box_regression_per_level)\n\n    box_cls = torch.cat(box_cls_flattened, dim=1).flatten(0, -2)  # start_dim, end_dim\n    box_regression = torch.cat(box_regression_flattened, dim=1).reshape(-1, 4)\n    return box_cls, box_regression\n\n\nclass RegionProposalNetwork(torch.nn.Module):\n    \"\"\"\n    Implements Region Proposal Network (RPN).\n\n    Arguments:\n        anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature\n            maps.\n        head (nn.Module): module that computes the objectness and regression deltas\n        fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be\n            considered as positive during training of the RPN.\n        bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be\n            considered as negative during training of the RPN.\n        batch_size_per_image (int): number of anchors that are sampled during training of the RPN\n            for computing the loss\n        positive_fraction (float): proportion of positive anchors in a mini-batch during training\n            of the RPN\n        pre_nms_top_n (Dict[str]): number of proposals to keep before applying NMS. It should\n            contain two fields: training and testing, to allow for different values depending\n            on training or evaluation\n        post_nms_top_n (Dict[str]): number of proposals to keep after applying NMS. It should\n            contain two fields: training and testing, to allow for different values depending\n            on training or evaluation\n        nms_thresh (float): NMS threshold used for postprocessing the RPN proposals\n\n    \"\"\"\n    __annotations__ = {\n        'box_coder': det_utils.BoxCoder,\n        'proposal_matcher': det_utils.Matcher,\n        'fg_bg_sampler': det_utils.BalancedPositiveNegativeSampler,\n        'pre_nms_top_n': Dict[str, int],\n        'post_nms_top_n': Dict[str, int],\n    }\n\n    def __init__(self, anchor_generator, head,\n                 fg_iou_thresh, bg_iou_thresh,\n                 batch_size_per_image, positive_fraction,\n                 pre_nms_top_n, post_nms_top_n, nms_thresh, score_thresh=0.0):\n        super(RegionProposalNetwork, self).__init__()\n        self.anchor_generator = anchor_generator\n        self.head = head\n        self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0))\n\n        # use during training\n        # 计算anchors与真实bbox的iou\n        self.box_similarity = box_ops.box_iou\n\n        self.proposal_matcher = det_utils.Matcher(\n            fg_iou_thresh,  # 当iou大于fg_iou_thresh(0.7)时视为正样本\n            bg_iou_thresh,  # 当iou小于bg_iou_thresh(0.3)时视为负样本\n            allow_low_quality_matches=True\n        )\n\n        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(\n            batch_size_per_image, positive_fraction  # 256, 0.5\n        )\n\n        # use during testing\n        self._pre_nms_top_n = pre_nms_top_n\n        self._post_nms_top_n = post_nms_top_n\n        self.nms_thresh = nms_thresh\n        self.score_thresh = score_thresh\n        self.min_size = 1.\n\n    def pre_nms_top_n(self):\n        if self.training:\n            return self._pre_nms_top_n['training']\n        return self._pre_nms_top_n['testing']\n\n    def post_nms_top_n(self):\n        if self.training:\n            return self._post_nms_top_n['training']\n        return self._post_nms_top_n['testing']\n\n    def assign_targets_to_anchors(self, anchors, targets):\n        # type: (List[Tensor], List[Dict[str, Tensor]]) -> Tuple[List[Tensor], List[Tensor]]\n        \"\"\"\n        计算每个anchors最匹配的gt，并划分为正样本，背景以及废弃的样本\n        Args：\n            anchors: (List[Tensor])\n            targets: (List[Dict[Tensor])\n        Returns:\n            labels: 标记anchors归属类别（1, 0, -1分别对应正样本，背景，废弃的样本）\n                    注意，在RPN中只有前景和背景，所有正样本的类别都是1，0代表背景\n            matched_gt_boxes：与anchors匹配的gt\n        \"\"\"\n        labels = []\n        matched_gt_boxes = []\n        # 遍历每张图像的anchors和targets\n        for anchors_per_image, targets_per_image in zip(anchors, targets):\n            gt_boxes = targets_per_image[\"boxes\"]\n            if gt_boxes.numel() == 0:\n                device = anchors_per_image.device\n                matched_gt_boxes_per_image = torch.zeros(anchors_per_image.shape, dtype=torch.float32, device=device)\n                labels_per_image = torch.zeros((anchors_per_image.shape[0],), dtype=torch.float32, device=device)\n            else:\n                # 计算anchors与真实bbox的iou信息\n                # set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands\n                match_quality_matrix = box_ops.box_iou(gt_boxes, anchors_per_image)\n                # 计算每个anchors与gt匹配iou最大的索引（如果iou<0.3索引置为-1，0.3<iou<0.7索引为-2）\n                matched_idxs = self.proposal_matcher(match_quality_matrix)\n                # get the targets corresponding GT for each proposal\n                # NB: need to clamp the indices because we can have a single\n                # GT in the image, and matched_idxs can be -2, which goes\n                # out of bounds\n                # 这里使用clamp设置下限0是为了方便取每个anchors对应的gt_boxes信息\n                # 负样本和舍弃的样本都是负值，所以为了防止越界直接置为0\n                # 因为后面是通过labels_per_image变量来记录正样本位置的，\n                # 所以负样本和舍弃的样本对应的gt_boxes信息并没有什么意义，\n                # 反正计算目标边界框回归损失时只会用到正样本。\n                matched_gt_boxes_per_image = gt_boxes[matched_idxs.clamp(min=0)]\n\n                # 记录所有anchors匹配后的标签(正样本处标记为1，负样本处标记为0，丢弃样本处标记为-2)\n                labels_per_image = matched_idxs >= 0\n                labels_per_image = labels_per_image.to(dtype=torch.float32)\n\n                # background (negative examples)\n                bg_indices = matched_idxs == self.proposal_matcher.BELOW_LOW_THRESHOLD  # -1\n                labels_per_image[bg_indices] = 0.0\n\n                # discard indices that are between thresholds\n                inds_to_discard = matched_idxs == self.proposal_matcher.BETWEEN_THRESHOLDS  # -2\n                labels_per_image[inds_to_discard] = -1.0\n\n            labels.append(labels_per_image)\n            matched_gt_boxes.append(matched_gt_boxes_per_image)\n        return labels, matched_gt_boxes\n\n    def _get_top_n_idx(self, objectness, num_anchors_per_level):\n        # type: (Tensor, List[int]) -> Tensor\n        \"\"\"\n        获取每张预测特征图上预测概率排前pre_nms_top_n的anchors索引值\n        Args:\n            objectness: Tensor(每张图像的预测目标概率信息 )\n            num_anchors_per_level: List（每个预测特征层上的预测的anchors个数）\n        Returns:\n\n        \"\"\"\n        r = []  # 记录每个预测特征层上预测目标概率前pre_nms_top_n的索引信息\n        offset = 0\n        # 遍历每个预测特征层上的预测目标概率信息\n        for ob in objectness.split(num_anchors_per_level, 1):\n            if torchvision._is_tracing():\n                num_anchors, pre_nms_top_n = _onnx_get_num_anchors_and_pre_nms_top_n(ob, self.pre_nms_top_n())\n            else:\n                num_anchors = ob.shape[1]  # 预测特征层上的预测的anchors个数\n                pre_nms_top_n = min(self.pre_nms_top_n(), num_anchors)\n\n            # Returns the k largest elements of the given input tensor along a given dimension\n            _, top_n_idx = ob.topk(pre_nms_top_n, dim=1)\n            r.append(top_n_idx + offset)\n            offset += num_anchors\n        return torch.cat(r, dim=1)\n\n    def filter_proposals(self, proposals, objectness, image_shapes, num_anchors_per_level):\n        # type: (Tensor, Tensor, List[Tuple[int, int]], List[int]) -> Tuple[List[Tensor], List[Tensor]]\n        \"\"\"\n        筛除小boxes框，nms处理，根据预测概率获取前post_nms_top_n个目标\n        Args:\n            proposals: 预测的bbox坐标\n            objectness: 预测的目标概率\n            image_shapes: batch中每张图片的size信息\n            num_anchors_per_level: 每个预测特征层上预测anchors的数目\n\n        Returns:\n\n        \"\"\"\n        num_images = proposals.shape[0]\n        device = proposals.device\n\n        # do not backprop throught objectness\n        objectness = objectness.detach()\n        objectness = objectness.reshape(num_images, -1)\n\n        # Returns a tensor of size size filled with fill_value\n        # levels负责记录分隔不同预测特征层上的anchors索引信息\n        levels = [torch.full((n, ), idx, dtype=torch.int64, device=device)\n                  for idx, n in enumerate(num_anchors_per_level)]\n        levels = torch.cat(levels, 0)\n\n        # Expand this tensor to the same size as objectness\n        levels = levels.reshape(1, -1).expand_as(objectness)\n\n        # select top_n boxes independently per level before applying nms\n        # 获取每张预测特征图上预测概率排前pre_nms_top_n的anchors索引值\n        top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level)\n\n        image_range = torch.arange(num_images, device=device)\n        batch_idx = image_range[:, None]  # [batch_size, 1]\n\n        # 根据每个预测特征层预测概率排前pre_nms_top_n的anchors索引值获取相应概率信息\n        objectness = objectness[batch_idx, top_n_idx]\n        levels = levels[batch_idx, top_n_idx]\n        # 预测概率排前pre_nms_top_n的anchors索引值获取相应bbox坐标信息\n        proposals = proposals[batch_idx, top_n_idx]\n\n        objectness_prob = torch.sigmoid(objectness)\n\n        final_boxes = []\n        final_scores = []\n        # 遍历每张图像的相关预测信息\n        for boxes, scores, lvl, img_shape in zip(proposals, objectness_prob, levels, image_shapes):\n            # 调整预测的boxes信息，将越界的坐标调整到图片边界上\n            boxes = box_ops.clip_boxes_to_image(boxes, img_shape)\n\n            # 返回boxes满足宽，高都大于min_size的索引\n            keep = box_ops.remove_small_boxes(boxes, self.min_size)\n            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]\n\n            # 移除小概率boxes，参考下面这个链接\n            # https://github.com/pytorch/vision/pull/3205\n            keep = torch.where(torch.ge(scores, self.score_thresh))[0]  # ge: >=\n            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]\n\n            # non-maximum suppression, independently done per level\n            keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh)\n\n            # keep only topk scoring predictions\n            keep = keep[: self.post_nms_top_n()]\n            boxes, scores = boxes[keep], scores[keep]\n\n            final_boxes.append(boxes)\n            final_scores.append(scores)\n        return final_boxes, final_scores\n\n    def compute_loss(self, objectness, pred_bbox_deltas, labels, regression_targets):\n        # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]\n        \"\"\"\n        计算RPN损失，包括类别损失（前景与背景），bbox regression损失\n        Arguments:\n            objectness (Tensor)：预测的前景概率\n            pred_bbox_deltas (Tensor)：预测的bbox regression\n            labels (List[Tensor])：真实的标签 1, 0, -1（batch中每一张图片的labels对应List的一个元素中）\n            regression_targets (List[Tensor])：真实的bbox regression\n\n        Returns:\n            objectness_loss (Tensor) : 类别损失\n            box_loss (Tensor)：边界框回归损失\n        \"\"\"\n        # 按照给定的batch_size_per_image, positive_fraction选择正负样本\n        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)\n        # 将一个batch中的所有正负样本List(Tensor)分别拼接在一起，并获取非零位置的索引\n        # sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1)\n        sampled_pos_inds = torch.where(torch.cat(sampled_pos_inds, dim=0))[0]\n        # sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1)\n        sampled_neg_inds = torch.where(torch.cat(sampled_neg_inds, dim=0))[0]\n\n        # 将所有正负样本索引拼接在一起\n        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)\n        objectness = objectness.flatten()\n\n        labels = torch.cat(labels, dim=0)\n        regression_targets = torch.cat(regression_targets, dim=0)\n\n        # 计算边界框回归损失\n        box_loss = det_utils.smooth_l1_loss(\n            pred_bbox_deltas[sampled_pos_inds],\n            regression_targets[sampled_pos_inds],\n            beta=1 / 9,\n            size_average=False,\n        ) / (sampled_inds.numel())\n\n        # 计算目标预测概率损失\n        objectness_loss = F.binary_cross_entropy_with_logits(\n            objectness[sampled_inds], labels[sampled_inds]\n        )\n\n        return objectness_loss, box_loss\n\n    def forward(self,\n                images,        # type: ImageList\n                features,      # type: Dict[str, Tensor]\n                targets=None   # type: Optional[List[Dict[str, Tensor]]]\n                ):\n        # type: (...) -> Tuple[List[Tensor], Dict[str, Tensor]]\n        \"\"\"\n        Arguments:\n            images (ImageList): images for which we want to compute the predictions\n            features (Dict[Tensor]): features computed from the images that are\n                used for computing the predictions. Each tensor in the list\n                correspond to different feature levels\n            targets (List[Dict[Tensor]): ground-truth boxes present in the image (optional).\n                If provided, each element in the dict should contain a field `boxes`,\n                with the locations of the ground-truth boxes.\n\n        Returns:\n            boxes (List[Tensor]): the predicted boxes from the RPN, one Tensor per\n                image.\n            losses (Dict[Tensor]): the losses for the model during training. During\n                testing, it is an empty dict.\n        \"\"\"\n        # RPN uses all feature maps that are available\n        # features是所有预测特征层组成的OrderedDict\n        features = list(features.values())\n\n        # 计算每个预测特征层上的预测目标概率和bboxes regression参数\n        # objectness和pred_bbox_deltas都是list\n        objectness, pred_bbox_deltas = self.head(features)\n\n        # 生成一个batch图像的所有anchors信息,list(tensor)元素个数等于batch_size\n        anchors = self.anchor_generator(images, features)\n\n        # batch_size\n        num_images = len(anchors)\n\n        # numel() Returns the total number of elements in the input tensor.\n        # 计算每个预测特征层上的对应的anchors数量\n        num_anchors_per_level_shape_tensors = [o[0].shape for o in objectness]\n        num_anchors_per_level = [s[0] * s[1] * s[2] for s in num_anchors_per_level_shape_tensors]\n\n        # 调整内部tensor格式以及shape\n        objectness, pred_bbox_deltas = concat_box_prediction_layers(objectness,\n                                                                    pred_bbox_deltas)\n\n        # apply pred_bbox_deltas to anchors to obtain the decoded proposals\n        # note that we detach the deltas because Faster R-CNN do not backprop through\n        # the proposals\n        # 将预测的bbox regression参数应用到anchors上得到最终预测bbox坐标\n        proposals = self.box_coder.decode(pred_bbox_deltas.detach(), anchors)\n        proposals = proposals.view(num_images, -1, 4)\n\n        # 筛除小boxes框，nms处理，根据预测概率获取前post_nms_top_n个目标\n        boxes, scores = self.filter_proposals(proposals, objectness, images.image_sizes, num_anchors_per_level)\n\n        losses = {}\n        if self.training:\n            assert targets is not None\n            # 计算每个anchors最匹配的gt，并将anchors进行分类，前景，背景以及废弃的anchors\n            labels, matched_gt_boxes = self.assign_targets_to_anchors(anchors, targets)\n            # 结合anchors以及对应的gt，计算regression参数\n            regression_targets = self.box_coder.encode(matched_gt_boxes, anchors)\n            loss_objectness, loss_rpn_box_reg = self.compute_loss(\n                objectness, pred_bbox_deltas, labels, regression_targets\n            )\n            losses = {\n                \"loss_objectness\": loss_objectness,\n                \"loss_rpn_box_reg\": loss_rpn_box_reg\n            }\n        return boxes, losses\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/network_files/transform.py",
    "content": "import math\nfrom typing import List, Tuple, Dict, Optional\n\nimport torch\nfrom torch import nn, Tensor\nimport torchvision\n\nfrom .image_list import ImageList\n\n\n@torch.jit.unused\ndef _resize_image_onnx(image, self_min_size, self_max_size):\n    # type: (Tensor, float, float) -> Tensor\n    from torch.onnx import operators\n    im_shape = operators.shape_as_tensor(image)[-2:]\n    min_size = torch.min(im_shape).to(dtype=torch.float32)\n    max_size = torch.max(im_shape).to(dtype=torch.float32)\n    scale_factor = torch.min(self_min_size / min_size, self_max_size / max_size)\n\n    image = torch.nn.functional.interpolate(\n        image[None], scale_factor=scale_factor, mode=\"bilinear\", recompute_scale_factor=True,\n        align_corners=False)[0]\n\n    return image\n\n\ndef _resize_image(image, self_min_size, self_max_size):\n    # type: (Tensor, float, float) -> Tensor\n    im_shape = torch.tensor(image.shape[-2:])\n    min_size = float(torch.min(im_shape))    # 获取高宽中的最小值\n    max_size = float(torch.max(im_shape))    # 获取高宽中的最大值\n    scale_factor = self_min_size / min_size  # 根据指定最小边长和图片最小边长计算缩放比例\n\n    # 如果使用该缩放比例计算的图片最大边长大于指定的最大边长\n    if max_size * scale_factor > self_max_size:\n        scale_factor = self_max_size / max_size  # 将缩放比例设为指定最大边长和图片最大边长之比\n\n    # interpolate利用插值的方法缩放图片\n    # image[None]操作是在最前面添加batch维度[C, H, W] -> [1, C, H, W]\n    # bilinear只支持4D Tensor\n    image = torch.nn.functional.interpolate(\n        image[None], scale_factor=scale_factor, mode=\"bilinear\", recompute_scale_factor=True,\n        align_corners=False)[0]\n\n    return image\n\n\nclass GeneralizedRCNNTransform(nn.Module):\n    \"\"\"\n    Performs input / target transformation before feeding the data to a GeneralizedRCNN\n    model.\n\n    The transformations it perform are:\n        - input normalization (mean subtraction and std division)\n        - input / target resizing to match min_size / max_size\n\n    It returns a ImageList for the inputs, and a List[Dict[Tensor]] for the targets\n    \"\"\"\n\n    def __init__(self, min_size, max_size, image_mean, image_std):\n        super(GeneralizedRCNNTransform, self).__init__()\n        if not isinstance(min_size, (list, tuple)):\n            min_size = (min_size,)\n        self.min_size = min_size      # 指定图像的最小边长范围\n        self.max_size = max_size      # 指定图像的最大边长范围\n        self.image_mean = image_mean  # 指定图像在标准化处理中的均值\n        self.image_std = image_std    # 指定图像在标准化处理中的方差\n\n    def normalize(self, image):\n        \"\"\"标准化处理\"\"\"\n        dtype, device = image.dtype, image.device\n        mean = torch.as_tensor(self.image_mean, dtype=dtype, device=device)\n        std = torch.as_tensor(self.image_std, dtype=dtype, device=device)\n        # [:, None, None]: shape [3] -> [3, 1, 1]\n        return (image - mean[:, None, None]) / std[:, None, None]\n\n    def torch_choice(self, k):\n        # type: (List[int]) -> int\n        \"\"\"\n        Implements `random.choice` via torch ops so it can be compiled with\n        TorchScript. Remove if https://github.com/pytorch/pytorch/issues/25803\n        is fixed.\n        \"\"\"\n        index = int(torch.empty(1).uniform_(0., float(len(k))).item())\n        return k[index]\n\n    def resize(self, image, target):\n        # type: (Tensor, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]\n        \"\"\"\n        将图片缩放到指定的大小范围内，并对应缩放bboxes信息\n        Args:\n            image: 输入的图片\n            target: 输入图片的相关信息（包括bboxes信息）\n\n        Returns:\n            image: 缩放后的图片\n            target: 缩放bboxes后的图片相关信息\n        \"\"\"\n        # image shape is [channel, height, width]\n        h, w = image.shape[-2:]\n\n        if self.training:\n            size = float(self.torch_choice(self.min_size))  # 指定输入图片的最小边长,注意是self.min_size不是min_size\n        else:\n            # FIXME assume for now that testing uses the largest scale\n            size = float(self.min_size[-1])    # 指定输入图片的最小边长,注意是self.min_size不是min_size\n\n        if torchvision._is_tracing():\n            image = _resize_image_onnx(image, size, float(self.max_size))\n        else:\n            image = _resize_image(image, size, float(self.max_size))\n\n        if target is None:\n            return image, target\n\n        bbox = target[\"boxes\"]\n        # 根据图像的缩放比例来缩放bbox\n        bbox = resize_boxes(bbox, [h, w], image.shape[-2:])\n        target[\"boxes\"] = bbox\n\n        return image, target\n\n    # _onnx_batch_images() is an implementation of\n    # batch_images() that is supported by ONNX tracing.\n    @torch.jit.unused\n    def _onnx_batch_images(self, images, size_divisible=32):\n        # type: (List[Tensor], int) -> Tensor\n        max_size = []\n        for i in range(images[0].dim()):\n            max_size_i = torch.max(torch.stack([img.shape[i] for img in images]).to(torch.float32)).to(torch.int64)\n            max_size.append(max_size_i)\n        stride = size_divisible\n        max_size[1] = (torch.ceil((max_size[1].to(torch.float32)) / stride) * stride).to(torch.int64)\n        max_size[2] = (torch.ceil((max_size[2].to(torch.float32)) / stride) * stride).to(torch.int64)\n        max_size = tuple(max_size)\n\n        # work around for\n        # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)\n        # which is not yet supported in onnx\n        padded_imgs = []\n        for img in images:\n            padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]\n            padded_img = torch.nn.functional.pad(img, [0, padding[2], 0, padding[1], 0, padding[0]])\n            padded_imgs.append(padded_img)\n\n        return torch.stack(padded_imgs)\n\n    def max_by_axis(self, the_list):\n        # type: (List[List[int]]) -> List[int]\n        maxes = the_list[0]\n        for sublist in the_list[1:]:\n            for index, item in enumerate(sublist):\n                maxes[index] = max(maxes[index], item)\n        return maxes\n\n    def batch_images(self, images, size_divisible=32):\n        # type: (List[Tensor], int) -> Tensor\n        \"\"\"\n        将一批图像打包成一个batch返回（注意batch中每个tensor的shape是相同的）\n        Args:\n            images: 输入的一批图片\n            size_divisible: 将图像高和宽调整到该数的整数倍\n\n        Returns:\n            batched_imgs: 打包成一个batch后的tensor数据\n        \"\"\"\n\n        if torchvision._is_tracing():\n            # batch_images() does not export well to ONNX\n            # call _onnx_batch_images() instead\n            return self._onnx_batch_images(images, size_divisible)\n\n        # 分别计算一个batch中所有图片中的最大channel, height, width\n        max_size = self.max_by_axis([list(img.shape) for img in images])\n\n        stride = float(size_divisible)\n        # max_size = list(max_size)\n        # 将height向上调整到stride的整数倍\n        max_size[1] = int(math.ceil(float(max_size[1]) / stride) * stride)\n        # 将width向上调整到stride的整数倍\n        max_size[2] = int(math.ceil(float(max_size[2]) / stride) * stride)\n\n        # [batch, channel, height, width]\n        batch_shape = [len(images)] + max_size\n\n        # 创建shape为batch_shape且值全部为0的tensor\n        batched_imgs = images[0].new_full(batch_shape, 0)\n        for img, pad_img in zip(images, batched_imgs):\n            # 将输入images中的每张图片复制到新的batched_imgs的每张图片中，对齐左上角，保证bboxes的坐标不变\n            # 这样保证输入到网络中一个batch的每张图片的shape相同\n            # copy_: Copies the elements from src into self tensor and returns self\n            pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)\n\n        return batched_imgs\n\n    def postprocess(self,\n                    result,                # type: List[Dict[str, Tensor]]\n                    image_shapes,          # type: List[Tuple[int, int]]\n                    original_image_sizes   # type: List[Tuple[int, int]]\n                    ):\n        # type: (...) -> List[Dict[str, Tensor]]\n        \"\"\"\n        对网络的预测结果进行后处理（主要将bboxes还原到原图像尺度上）\n        Args:\n            result: list(dict), 网络的预测结果, len(result) == batch_size\n            image_shapes: list(torch.Size), 图像预处理缩放后的尺寸, len(image_shapes) == batch_size\n            original_image_sizes: list(torch.Size), 图像的原始尺寸, len(original_image_sizes) == batch_size\n\n        Returns:\n\n        \"\"\"\n        if self.training:\n            return result\n\n        # 遍历每张图片的预测信息，将boxes信息还原回原尺度\n        for i, (pred, im_s, o_im_s) in enumerate(zip(result, image_shapes, original_image_sizes)):\n            boxes = pred[\"boxes\"]\n            boxes = resize_boxes(boxes, im_s, o_im_s)  # 将bboxes缩放回原图像尺度上\n            result[i][\"boxes\"] = boxes\n        return result\n\n    def __repr__(self):\n        \"\"\"自定义输出实例化对象的信息，可通过print打印实例信息\"\"\"\n        format_string = self.__class__.__name__ + '('\n        _indent = '\\n    '\n        format_string += \"{0}Normalize(mean={1}, std={2})\".format(_indent, self.image_mean, self.image_std)\n        format_string += \"{0}Resize(min_size={1}, max_size={2}, mode='bilinear')\".format(_indent, self.min_size,\n                                                                                         self.max_size)\n        format_string += '\\n)'\n        return format_string\n\n    def forward(self,\n                images,       # type: List[Tensor]\n                targets=None  # type: Optional[List[Dict[str, Tensor]]]\n                ):\n        # type: (...) -> Tuple[ImageList, Optional[List[Dict[str, Tensor]]]]\n        images = [img for img in images]\n        for i in range(len(images)):\n            image = images[i]\n            target_index = targets[i] if targets is not None else None\n\n            if image.dim() != 3:\n                raise ValueError(\"images is expected to be a list of 3d tensors \"\n                                 \"of shape [C, H, W], got {}\".format(image.shape))\n            image = self.normalize(image)                # 对图像进行标准化处理\n            image, target_index = self.resize(image, target_index)   # 对图像和对应的bboxes缩放到指定范围\n            images[i] = image\n            if targets is not None and target_index is not None:\n                targets[i] = target_index\n\n        # 记录resize后的图像尺寸\n        image_sizes = [img.shape[-2:] for img in images]\n        images = self.batch_images(images)  # 将images打包成一个batch\n        image_sizes_list = torch.jit.annotate(List[Tuple[int, int]], [])\n\n        for image_size in image_sizes:\n            assert len(image_size) == 2\n            image_sizes_list.append((image_size[0], image_size[1]))\n\n        image_list = ImageList(images, image_sizes_list)\n        return image_list, targets\n\n\ndef resize_boxes(boxes, original_size, new_size):\n    # type: (Tensor, List[int], List[int]) -> Tensor\n    \"\"\"\n    将boxes参数根据图像的缩放情况进行相应缩放\n\n    Arguments:\n        original_size: 图像缩放前的尺寸\n        new_size: 图像缩放后的尺寸\n    \"\"\"\n    ratios = [\n        torch.tensor(s, dtype=torch.float32, device=boxes.device) /\n        torch.tensor(s_orig, dtype=torch.float32, device=boxes.device)\n        for s, s_orig in zip(new_size, original_size)\n    ]\n    ratios_height, ratios_width = ratios\n    # Removes a tensor dimension, boxes [minibatch, 4]\n    # Returns a tuple of all slices along a given dimension, already without it.\n    xmin, ymin, xmax, ymax = boxes.unbind(1)\n    xmin = xmin * ratios_width\n    xmax = xmax * ratios_width\n    ymin = ymin * ratios_height\n    ymax = ymax * ratios_height\n    return torch.stack((xmin, ymin, xmax, ymax), dim=1)\n\n\n\n\n\n\n\n\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/pascal_voc_classes.json",
    "content": "{\n    \"aeroplane\": 1,\n    \"bicycle\": 2,\n    \"bird\": 3,\n    \"boat\": 4,\n    \"bottle\": 5,\n    \"bus\": 6,\n    \"car\": 7,\n    \"cat\": 8,\n    \"chair\": 9,\n    \"cow\": 10,\n    \"diningtable\": 11,\n    \"dog\": 12,\n    \"horse\": 13,\n    \"motorbike\": 14,\n    \"person\": 15,\n    \"pottedplant\": 16,\n    \"sheep\": 17,\n    \"sofa\": 18,\n    \"train\": 19,\n    \"tvmonitor\": 20\n}"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/plot_curve.py",
    "content": "import datetime\nimport matplotlib.pyplot as plt\n\n\ndef plot_loss_and_lr(train_loss, learning_rate):\n    try:\n        x = list(range(len(train_loss)))\n        fig, ax1 = plt.subplots(1, 1)\n        ax1.plot(x, train_loss, 'r', label='loss')\n        ax1.set_xlabel(\"step\")\n        ax1.set_ylabel(\"loss\")\n        ax1.set_title(\"Train Loss and lr\")\n        plt.legend(loc='best')\n\n        ax2 = ax1.twinx()\n        ax2.plot(x, learning_rate, label='lr')\n        ax2.set_ylabel(\"learning rate\")\n        ax2.set_xlim(0, len(train_loss))  # 设置横坐标整数间隔\n        plt.legend(loc='best')\n\n        handles1, labels1 = ax1.get_legend_handles_labels()\n        handles2, labels2 = ax2.get_legend_handles_labels()\n        plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right')\n\n        fig.subplots_adjust(right=0.8)  # 防止出现保存图片显示不全的情况\n        fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")))\n        plt.close()\n        print(\"successful save loss curve! \")\n    except Exception as e:\n        print(e)\n\n\ndef plot_map(mAP):\n    try:\n        x = list(range(len(mAP)))\n        plt.plot(x, mAP, label='mAp')\n        plt.xlabel('epoch')\n        plt.ylabel('mAP')\n        plt.title('Eval mAP')\n        plt.xlim(0, len(mAP))\n        plt.legend(loc='best')\n        plt.savefig('./mAP.png')\n        plt.close()\n        print(\"successful save mAP curve!\")\n    except Exception as e:\n        print(e)\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/predict.py",
    "content": "import os\nimport time\nimport json\n\nimport torch\nimport torchvision\nfrom PIL import Image\nimport matplotlib.pyplot as plt\n\nfrom torchvision import transforms\nfrom network_files import FasterRCNN, FastRCNNPredictor, AnchorsGenerator\nfrom backbone import resnet50_fpn_backbone, MobileNetV2\nfrom draw_box_utils import draw_objs\n\n\ndef create_model(num_classes):\n    # mobileNetv2+faster_RCNN\n    # backbone = MobileNetV2().features\n    # backbone.out_channels = 1280\n    #\n    # anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),),\n    #                                     aspect_ratios=((0.5, 1.0, 2.0),))\n    #\n    # roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],\n    #                                                 output_size=[7, 7],\n    #                                                 sampling_ratio=2)\n    #\n    # model = FasterRCNN(backbone=backbone,\n    #                    num_classes=num_classes,\n    #                    rpn_anchor_generator=anchor_generator,\n    #                    box_roi_pool=roi_pooler)\n\n    # resNet50+fpn+faster_RCNN\n    # 注意，这里的norm_layer要和训练脚本中保持一致\n    backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d)\n    model = FasterRCNN(backbone=backbone, num_classes=num_classes, rpn_score_thresh=0.5)\n\n    return model\n\n\ndef time_synchronized():\n    torch.cuda.synchronize() if torch.cuda.is_available() else None\n    return time.time()\n\n\ndef main():\n    # get devices\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n    print(\"using {} device.\".format(device))\n\n    # create model\n    model = create_model(num_classes=21)\n\n    # load train weights\n    weights_path = \"./save_weights/model.pth\"\n    assert os.path.exists(weights_path), \"{} file dose not exist.\".format(weights_path)\n    weights_dict = torch.load(weights_path, map_location='cpu')\n    weights_dict = weights_dict[\"model\"] if \"model\" in weights_dict else weights_dict\n    model.load_state_dict(weights_dict)\n    model.to(device)\n\n    # read class_indict\n    label_json_path = './pascal_voc_classes.json'\n    assert os.path.exists(label_json_path), \"json file {} dose not exist.\".format(label_json_path)\n    with open(label_json_path, 'r') as f:\n        class_dict = json.load(f)\n\n    category_index = {str(v): str(k) for k, v in class_dict.items()}\n\n    # load image\n    original_img = Image.open(\"./test.jpg\")\n\n    # from pil image to tensor, do not normalize image\n    data_transform = transforms.Compose([transforms.ToTensor()])\n    img = data_transform(original_img)\n    # expand batch dimension\n    img = torch.unsqueeze(img, dim=0)\n\n    model.eval()  # 进入验证模式\n    with torch.no_grad():\n        # init\n        img_height, img_width = img.shape[-2:]\n        init_img = torch.zeros((1, 3, img_height, img_width), device=device)\n        model(init_img)\n\n        t_start = time_synchronized()\n        predictions = model(img.to(device))[0]\n        t_end = time_synchronized()\n        print(\"inference+NMS time: {}\".format(t_end - t_start))\n\n        predict_boxes = predictions[\"boxes\"].to(\"cpu\").numpy()\n        predict_classes = predictions[\"labels\"].to(\"cpu\").numpy()\n        predict_scores = predictions[\"scores\"].to(\"cpu\").numpy()\n\n        if len(predict_boxes) == 0:\n            print(\"没有检测到任何目标!\")\n\n        plot_img = draw_objs(original_img,\n                             predict_boxes,\n                             predict_classes,\n                             predict_scores,\n                             category_index=category_index,\n                             box_thresh=0.5,\n                             line_thickness=3,\n                             font='arial.ttf',\n                             font_size=20)\n        plt.imshow(plot_img)\n        plt.show()\n        # 保存预测的图片结果\n        plot_img.save(\"test_result.jpg\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/record_mAP.txt",
    "content": "COCO results:\n Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.526\n Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.804\n Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.586\n Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.211\n Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.403\n Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.580\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.454\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.639\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.646\n Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.347\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.540\n Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.693\n\nmAP(IoU=0.5) for each category:\n aeroplane      : 0.8759546352558178\n bicycle        : 0.8554609242543677\n bird           : 0.8434943725365999\n boat           : 0.6753024837855667\n bottle         : 0.7185899054232459\n bus            : 0.8691082170432654\n car            : 0.8771002682431779\n cat            : 0.9169138943375639\n chair          : 0.6403466317122392\n cow            : 0.8285552434280278\n diningtable    : 0.6437938565684241\n dog            : 0.8745793980119227\n horse          : 0.8718238708874728\n motorbike      : 0.8910672301923952\n person         : 0.9047338725598096\n pottedplant    : 0.5808810399193133\n sheep          : 0.86045368568359\n sofa           : 0.7239390963388067\n train          : 0.8652277764020805\n tvmonitor      : 0.7683550206571649"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/requirements.txt",
    "content": "lxml\nmatplotlib\nnumpy\ntqdm\ntorch==1.7.1\ntorchvision==0.8.2\npycocotools\nPillow\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/split_data.py",
    "content": "import os\nimport random\n\n\ndef main():\n    random.seed(0)  # 设置随机种子，保证随机结果可复现\n\n    files_path = \"./VOCdevkit/VOC2012/Annotations\"\n    assert os.path.exists(files_path), \"path: '{}' does not exist.\".format(files_path)\n\n    val_rate = 0.5\n\n    files_name = sorted([file.split(\".\")[0] for file in os.listdir(files_path)])\n    files_num = len(files_name)\n    val_index = random.sample(range(0, files_num), k=int(files_num*val_rate))\n    train_files = []\n    val_files = []\n    for index, file_name in enumerate(files_name):\n        if index in val_index:\n            val_files.append(file_name)\n        else:\n            train_files.append(file_name)\n\n    try:\n        train_f = open(\"train.txt\", \"x\")\n        eval_f = open(\"val.txt\", \"x\")\n        train_f.write(\"\\n\".join(train_files))\n        eval_f.write(\"\\n\".join(val_files))\n    except FileExistsError as e:\n        print(e)\n        exit(1)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/train_mobilenetv2.py",
    "content": "import os\nimport datetime\n\nimport torch\nimport torchvision\n\nimport transforms\nfrom network_files import FasterRCNN, AnchorsGenerator\nfrom backbone import MobileNetV2, vgg\nfrom my_dataset import VOCDataSet\nfrom train_utils import GroupedBatchSampler, create_aspect_ratio_groups\nfrom train_utils import train_eval_utils as utils\n\n\ndef create_model(num_classes):\n    # https://download.pytorch.org/models/vgg16-397923af.pth\n    # 如果使用vgg16的话就下载对应预训练权重并取消下面注释，接着把mobilenetv2模型对应的两行代码注释掉\n    # vgg_feature = vgg(model_name=\"vgg16\", weights_path=\"./backbone/vgg16.pth\").features\n    # backbone = torch.nn.Sequential(*list(vgg_feature._modules.values())[:-1])  # 删除features中最后一个Maxpool层\n    # backbone.out_channels = 512\n\n    # https://download.pytorch.org/models/mobilenet_v2-b0353104.pth\n    backbone = MobileNetV2(weights_path=\"./backbone/mobilenet_v2.pth\").features\n    backbone.out_channels = 1280  # 设置对应backbone输出特征矩阵的channels\n\n    anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),),\n                                        aspect_ratios=((0.5, 1.0, 2.0),))\n\n    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],  # 在哪些特征层上进行roi pooling\n                                                    output_size=[7, 7],   # roi_pooling输出特征矩阵尺寸\n                                                    sampling_ratio=2)  # 采样率\n\n    model = FasterRCNN(backbone=backbone,\n                       num_classes=num_classes,\n                       rpn_anchor_generator=anchor_generator,\n                       box_roi_pool=roi_pooler)\n\n    return model\n\n\ndef main():\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n    print(\"Using {} device training.\".format(device.type))\n\n    # 用来保存coco_info的文件\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    # 检查保存权重文件夹是否存在，不存在则创建\n    if not os.path.exists(\"save_weights\"):\n        os.makedirs(\"save_weights\")\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.ToTensor(),\n                                     transforms.RandomHorizontalFlip(0.5)]),\n        \"val\": transforms.Compose([transforms.ToTensor()])\n    }\n\n    VOC_root = \"./\"  # VOCdevkit\n    aspect_ratio_group_factor = 3\n    batch_size = 8\n    amp = False  # 是否使用混合精度训练，需要GPU支持\n\n    # check voc root\n    if os.path.exists(os.path.join(VOC_root, \"VOCdevkit\")) is False:\n        raise FileNotFoundError(\"VOCdevkit dose not in path:'{}'.\".format(VOC_root))\n\n    # load train data set\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt\n    train_dataset = VOCDataSet(VOC_root, \"2012\", data_transform[\"train\"], \"train.txt\")\n    train_sampler = None\n\n    # 是否按图片相似高宽比采样图片组成batch\n    # 使用的话能够减小训练时所需GPU显存，默认使用\n    if aspect_ratio_group_factor >= 0:\n        train_sampler = torch.utils.data.RandomSampler(train_dataset)\n        # 统计所有图像高宽比例在bins区间中的位置索引\n        group_ids = create_aspect_ratio_groups(train_dataset, k=aspect_ratio_group_factor)\n        # 每个batch图片从同一高宽比例区间中取\n        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, batch_size)\n\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using %g dataloader workers' % nw)\n\n    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch\n    if train_sampler:\n        # 如果按照图片高宽比采样图片，dataloader中需要使用batch_sampler\n        train_data_loader = torch.utils.data.DataLoader(train_dataset,\n                                                        batch_sampler=train_batch_sampler,\n                                                        pin_memory=True,\n                                                        num_workers=nw,\n                                                        collate_fn=train_dataset.collate_fn)\n    else:\n        train_data_loader = torch.utils.data.DataLoader(train_dataset,\n                                                        batch_size=batch_size,\n                                                        shuffle=True,\n                                                        pin_memory=True,\n                                                        num_workers=nw,\n                                                        collate_fn=train_dataset.collate_fn)\n\n    # load validation data set\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt\n    val_dataset = VOCDataSet(VOC_root, \"2012\", data_transform[\"val\"], \"val.txt\")\n    val_data_loader = torch.utils.data.DataLoader(val_dataset,\n                                                  batch_size=1,\n                                                  shuffle=False,\n                                                  pin_memory=True,\n                                                  num_workers=nw,\n                                                  collate_fn=val_dataset.collate_fn)\n\n    # create model num_classes equal background + 20 classes\n    model = create_model(num_classes=21)\n    # print(model)\n\n    model.to(device)\n\n    scaler = torch.cuda.amp.GradScaler() if amp else None\n\n    train_loss = []\n    learning_rate = []\n    val_map = []\n\n    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #\n    #  first frozen backbone and train 5 epochs                   #\n    #  首先冻结前置特征提取网络权重（backbone），训练rpn以及最终预测网络部分 #\n    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #\n    for param in model.backbone.parameters():\n        param.requires_grad = False\n\n    # define optimizer\n    params = [p for p in model.parameters() if p.requires_grad]\n    optimizer = torch.optim.SGD(params, lr=0.005,\n                                momentum=0.9, weight_decay=0.0005)\n\n    init_epochs = 5\n    for epoch in range(init_epochs):\n        # train for one epoch, printing every 10 iterations\n        mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader,\n                                              device, epoch, print_freq=50,\n                                              warmup=True, scaler=scaler)\n        train_loss.append(mean_loss.item())\n        learning_rate.append(lr)\n\n        # evaluate on the test dataset\n        coco_info = utils.evaluate(model, val_data_loader, device=device)\n\n        # write into txt\n        with open(results_file, \"a\") as f:\n            # 写入的数据包括coco指标还有loss和learning rate\n            result_info = [f\"{i:.4f}\" for i in coco_info + [mean_loss.item()]] + [f\"{lr:.6f}\"]\n            txt = \"epoch:{} {}\".format(epoch, '  '.join(result_info))\n            f.write(txt + \"\\n\")\n\n        val_map.append(coco_info[1])  # pascal mAP\n\n    torch.save(model.state_dict(), \"./save_weights/pretrain.pth\")\n\n    # # # # # # # # # # # # # # # # # # # # # # # # # # # #\n    #  second unfrozen backbone and train all network     #\n    #  解冻前置特征提取网络权重（backbone），接着训练整个网络权重  #\n    # # # # # # # # # # # # # # # # # # # # # # # # # # # #\n\n    # 冻结backbone部分底层权重\n    for name, parameter in model.backbone.named_parameters():\n        split_name = name.split(\".\")[0]\n        if split_name in [\"0\", \"1\", \"2\", \"3\"]:\n            parameter.requires_grad = False\n        else:\n            parameter.requires_grad = True\n\n    # define optimizer\n    params = [p for p in model.parameters() if p.requires_grad]\n    optimizer = torch.optim.SGD(params, lr=0.005,\n                                momentum=0.9, weight_decay=0.0005)\n    # learning rate scheduler\n    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,\n                                                   step_size=3,\n                                                   gamma=0.33)\n    num_epochs = 20\n    for epoch in range(init_epochs, num_epochs+init_epochs, 1):\n        # train for one epoch, printing every 50 iterations\n        mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader,\n                                              device, epoch, print_freq=50,\n                                              warmup=True, scaler=scaler)\n        train_loss.append(mean_loss.item())\n        learning_rate.append(lr)\n\n        # update the learning rate\n        lr_scheduler.step()\n\n        # evaluate on the test dataset\n        coco_info = utils.evaluate(model, val_data_loader, device=device)\n\n        # write into txt\n        with open(results_file, \"a\") as f:\n            # 写入的数据包括coco指标还有loss和learning rate\n            result_info = [f\"{i:.4f}\" for i in coco_info + [mean_loss.item()]] + [f\"{lr:.6f}\"]\n            txt = \"epoch:{} {}\".format(epoch, '  '.join(result_info))\n            f.write(txt + \"\\n\")\n\n        val_map.append(coco_info[1])  # pascal mAP\n\n        # save weights\n        # 仅保存最后5个epoch的权重\n        if epoch in range(num_epochs+init_epochs)[-5:]:\n            save_files = {\n                'model': model.state_dict(),\n                'optimizer': optimizer.state_dict(),\n                'lr_scheduler': lr_scheduler.state_dict(),\n                'epoch': epoch}\n            torch.save(save_files, \"./save_weights/mobile-model-{}.pth\".format(epoch))\n\n    # plot loss and lr curve\n    if len(train_loss) != 0 and len(learning_rate) != 0:\n        from plot_curve import plot_loss_and_lr\n        plot_loss_and_lr(train_loss, learning_rate)\n\n    # plot mAP curve\n    if len(val_map) != 0:\n        from plot_curve import plot_map\n        plot_map(val_map)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/train_multi_GPU.py",
    "content": "import time\nimport os\nimport datetime\n\nimport torch\n\nimport transforms\nfrom my_dataset import VOCDataSet\nfrom backbone import resnet50_fpn_backbone\nfrom network_files import FasterRCNN, FastRCNNPredictor\nimport train_utils.train_eval_utils as utils\nfrom train_utils import GroupedBatchSampler, create_aspect_ratio_groups, init_distributed_mode, save_on_master, mkdir\n\n\ndef create_model(num_classes):\n    # 如果显存很小，建议使用默认的FrozenBatchNorm2d\n    # trainable_layers包括['layer4', 'layer3', 'layer2', 'layer1', 'conv1']， 5代表全部训练\n    backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d,\n                                     trainable_layers=3)\n    # 训练自己数据集时不要修改这里的91，修改的是传入的num_classes参数\n    model = FasterRCNN(backbone=backbone, num_classes=91)\n    # 载入预训练模型权重\n    # https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth\n    weights_dict = torch.load(\"./backbone/fasterrcnn_resnet50_fpn_coco.pth\", map_location='cpu')\n    missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False)\n    if len(missing_keys) != 0 or len(unexpected_keys) != 0:\n        print(\"missing_keys: \", missing_keys)\n        print(\"unexpected_keys: \", unexpected_keys)\n\n    # get number of input features for the classifier\n    in_features = model.roi_heads.box_predictor.cls_score.in_features\n    # replace the pre-trained head with a new one\n    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)\n\n    return model\n\n\ndef main(args):\n    init_distributed_mode(args)\n    print(args)\n\n    device = torch.device(args.device)\n\n    # 用来保存coco_info的文件\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    # Data loading code\n    print(\"Loading data\")\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.ToTensor(),\n                                     transforms.RandomHorizontalFlip(0.5)]),\n        \"val\": transforms.Compose([transforms.ToTensor()])\n    }\n\n    VOC_root = args.data_path\n    # check voc root\n    if os.path.exists(os.path.join(VOC_root, \"VOCdevkit\")) is False:\n        raise FileNotFoundError(\"VOCdevkit dose not in path:'{}'.\".format(VOC_root))\n\n    # load train data set\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt\n    train_dataset = VOCDataSet(VOC_root, \"2012\", data_transform[\"train\"], \"train.txt\")\n\n    # load validation data set\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt\n    val_dataset = VOCDataSet(VOC_root, \"2012\", data_transform[\"val\"], \"val.txt\")\n\n    print(\"Creating data loaders\")\n    if args.distributed:\n        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)\n        test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)\n    else:\n        train_sampler = torch.utils.data.RandomSampler(train_dataset)\n        test_sampler = torch.utils.data.SequentialSampler(val_dataset)\n\n    if args.aspect_ratio_group_factor >= 0:\n        # 统计所有图像比例在bins区间中的位置索引\n        group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor)\n        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)\n    else:\n        train_batch_sampler = torch.utils.data.BatchSampler(\n            train_sampler, args.batch_size, drop_last=True)\n\n    data_loader = torch.utils.data.DataLoader(\n        train_dataset, batch_sampler=train_batch_sampler, num_workers=args.workers,\n        collate_fn=train_dataset.collate_fn)\n\n    data_loader_test = torch.utils.data.DataLoader(\n        val_dataset, batch_size=1,\n        sampler=test_sampler, num_workers=args.workers,\n        collate_fn=train_dataset.collate_fn)\n\n    print(\"Creating model\")\n    # create model num_classes equal background + 20 classes\n    model = create_model(num_classes=args.num_classes + 1)\n    model.to(device)\n\n    if args.distributed and args.sync_bn:\n        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)\n\n    model_without_ddp = model\n    if args.distributed:\n        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])\n        model_without_ddp = model.module\n\n    params = [p for p in model.parameters() if p.requires_grad]\n    optimizer = torch.optim.SGD(\n        params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)\n\n    scaler = torch.cuda.amp.GradScaler() if args.amp else None\n\n    # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma)\n    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)\n\n    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练\n    if args.resume:\n        # If map_location is missing, torch.load will first load the module to CPU\n        # and then copy each parameter to where it was saved,\n        # which would result in all processes on the same machine using the same set of devices.\n        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)\n        model_without_ddp.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n        if args.amp and \"scaler\" in checkpoint:\n            scaler.load_state_dict(checkpoint[\"scaler\"])\n\n    if args.test_only:\n        utils.evaluate(model, data_loader_test, device=device)\n        return\n\n    train_loss = []\n    learning_rate = []\n    val_map = []\n\n    print(\"Start training\")\n    start_time = time.time()\n    for epoch in range(args.start_epoch, args.epochs):\n        if args.distributed:\n            train_sampler.set_epoch(epoch)\n        mean_loss, lr = utils.train_one_epoch(model, optimizer, data_loader,\n                                              device, epoch, args.print_freq,\n                                              warmup=True, scaler=scaler)\n        train_loss.append(mean_loss.item())\n        learning_rate.append(lr)\n\n        # update learning rate\n        lr_scheduler.step()\n\n        # evaluate after every epoch\n        coco_info = utils.evaluate(model, data_loader_test, device=device)\n        val_map.append(coco_info[1])  # pascal mAP\n\n        # 只在主进程上进行写操作\n        if args.rank in [-1, 0]:\n            # write into txt\n            with open(results_file, \"a\") as f:\n                # 写入的数据包括coco指标还有loss和learning rate\n                result_info = [f\"{i:.4f}\" for i in coco_info + [mean_loss.item()]] + [f\"{lr:.6f}\"]\n                txt = \"epoch:{} {}\".format(epoch, '  '.join(result_info))\n                f.write(txt + \"\\n\")\n\n        if args.output_dir:\n            # 只在主节点上执行保存权重操作\n            save_files = {\n                'model': model_without_ddp.state_dict(),\n                'optimizer': optimizer.state_dict(),\n                'lr_scheduler': lr_scheduler.state_dict(),\n                'args': args,\n                'epoch': epoch}\n            if args.amp:\n                save_files[\"scaler\"] = scaler.state_dict()\n            save_on_master(save_files,\n                           os.path.join(args.output_dir, f'model_{epoch}.pth'))\n\n    total_time = time.time() - start_time\n    total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n    print('Training time {}'.format(total_time_str))\n\n    if args.rank in [-1, 0]:\n        # plot loss and lr curve\n        if len(train_loss) != 0 and len(learning_rate) != 0:\n            from plot_curve import plot_loss_and_lr\n            plot_loss_and_lr(train_loss, learning_rate)\n\n        # plot mAP curve\n        if len(val_map) != 0:\n            from plot_curve import plot_map\n            plot_map(val_map)\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 训练文件的根目录(VOCdevkit)\n    parser.add_argument('--data-path', default='./', help='dataset')\n    # 训练设备类型\n    parser.add_argument('--device', default='cuda', help='device')\n    # 检测目标类别数(不包含背景)\n    parser.add_argument('--num-classes', default=20, type=int, help='num_classes')\n    # 每块GPU上的batch_size\n    parser.add_argument('-b', '--batch-size', default=4, type=int,\n                        help='images per gpu, the total batch size is $NGPU x batch_size')\n    # 指定接着从哪个epoch数开始训练\n    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')\n    # 训练的总epoch数\n    parser.add_argument('--epochs', default=20, type=int, metavar='N',\n                        help='number of total epochs to run')\n    # 数据加载以及预处理的线程数\n    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',\n                        help='number of data loading workers (default: 4)')\n    # 学习率，这个需要根据gpu的数量以及batch_size进行设置0.02 / 8 * num_GPU\n    parser.add_argument('--lr', default=0.02, type=float,\n                        help='initial learning rate, 0.02 is the default value for training '\n                             'on 8 gpus and 2 images_per_gpu')\n    # SGD的momentum参数\n    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',\n                        help='momentum')\n    # SGD的weight_decay参数\n    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,\n                        metavar='W', help='weight decay (default: 1e-4)',\n                        dest='weight_decay')\n    # 针对torch.optim.lr_scheduler.StepLR的参数\n    parser.add_argument('--lr-step-size', default=8, type=int, help='decrease lr every step-size epochs')\n    # 针对torch.optim.lr_scheduler.MultiStepLR的参数\n    parser.add_argument('--lr-steps', default=[7, 12], nargs='+', type=int, help='decrease lr every step-size epochs')\n    # 针对torch.optim.lr_scheduler.MultiStepLR的参数\n    parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')\n    # 训练过程打印信息的频率\n    parser.add_argument('--print-freq', default=20, type=int, help='print frequency')\n    # 文件保存地址\n    parser.add_argument('--output-dir', default='./multi_train', help='path where to save')\n    # 基于上次的训练结果接着训练\n    parser.add_argument('--resume', default='', help='resume from checkpoint')\n    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)\n    # 不训练，仅测试\n    parser.add_argument(\n        \"--test-only\",\n        dest=\"test_only\",\n        help=\"Only test the model\",\n        action=\"store_true\",\n    )\n\n    # 开启的进程数(注意不是线程)\n    parser.add_argument('--world-size', default=4, type=int,\n                        help='number of distributed processes')\n    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')\n    parser.add_argument(\"--sync-bn\", dest=\"sync_bn\", help=\"Use sync batch norm\", type=bool, default=False)\n    # 是否使用混合精度训练(需要GPU支持混合精度)\n    parser.add_argument(\"--amp\", default=False, help=\"Use torch.cuda.amp for mixed precision training\")\n\n    args = parser.parse_args()\n\n    # 如果指定了保存文件地址，检查文件夹是否存在，若不存在，则创建\n    if args.output_dir:\n        mkdir(args.output_dir)\n\n    main(args)\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/train_res50_fpn.py",
    "content": "import os\nimport datetime\n\nimport torch\n\nimport transforms\nfrom network_files import FasterRCNN, FastRCNNPredictor\nfrom backbone import resnet50_fpn_backbone\nfrom my_dataset import VOCDataSet\nfrom train_utils import GroupedBatchSampler, create_aspect_ratio_groups\nfrom train_utils import train_eval_utils as utils\n\n\ndef create_model(num_classes, load_pretrain_weights=True):\n    # 注意，这里的backbone默认使用的是FrozenBatchNorm2d，即不会去更新bn参数\n    # 目的是为了防止batch_size太小导致效果更差(如果显存很小，建议使用默认的FrozenBatchNorm2d)\n    # 如果GPU显存很大可以设置比较大的batch_size就可以将norm_layer设置为普通的BatchNorm2d\n    # trainable_layers包括['layer4', 'layer3', 'layer2', 'layer1', 'conv1']， 5代表全部训练\n    # resnet50 imagenet weights url: https://download.pytorch.org/models/resnet50-0676ba61.pth\n    backbone = resnet50_fpn_backbone(pretrain_path=\"./backbone/resnet50.pth\",\n                                     norm_layer=torch.nn.BatchNorm2d,\n                                     trainable_layers=3)\n    # 训练自己数据集时不要修改这里的91，修改的是传入的num_classes参数\n    model = FasterRCNN(backbone=backbone, num_classes=91)\n    \n    if load_pretrain_weights:\n        # 载入预训练模型权重\n        # https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth\n        weights_dict = torch.load(\"./backbone/fasterrcnn_resnet50_fpn_coco.pth\", map_location='cpu')\n        missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False)\n        if len(missing_keys) != 0 or len(unexpected_keys) != 0:\n            print(\"missing_keys: \", missing_keys)\n            print(\"unexpected_keys: \", unexpected_keys)\n\n    # get number of input features for the classifier\n    in_features = model.roi_heads.box_predictor.cls_score.in_features\n    # replace the pre-trained head with a new one\n    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)\n\n    return model\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n    print(\"Using {} device training.\".format(device.type))\n\n    # 用来保存coco_info的文件\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.ToTensor(),\n                                     transforms.RandomHorizontalFlip(0.5)]),\n        \"val\": transforms.Compose([transforms.ToTensor()])\n    }\n\n    VOC_root = args.data_path\n    # check voc root\n    if os.path.exists(os.path.join(VOC_root, \"VOCdevkit\")) is False:\n        raise FileNotFoundError(\"VOCdevkit dose not in path:'{}'.\".format(VOC_root))\n\n    # load train data set\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt\n    train_dataset = VOCDataSet(VOC_root, \"2012\", data_transform[\"train\"], \"train.txt\")\n    train_sampler = None\n\n    # 是否按图片相似高宽比采样图片组成batch\n    # 使用的话能够减小训练时所需GPU显存，默认使用\n    if args.aspect_ratio_group_factor >= 0:\n        train_sampler = torch.utils.data.RandomSampler(train_dataset)\n        # 统计所有图像高宽比例在bins区间中的位置索引\n        group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor)\n        # 每个batch图片从同一高宽比例区间中取\n        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)\n\n    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch\n    batch_size = args.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using %g dataloader workers' % nw)\n    if train_sampler:\n        # 如果按照图片高宽比采样图片，dataloader中需要使用batch_sampler\n        train_data_loader = torch.utils.data.DataLoader(train_dataset,\n                                                        batch_sampler=train_batch_sampler,\n                                                        pin_memory=True,\n                                                        num_workers=nw,\n                                                        collate_fn=train_dataset.collate_fn)\n    else:\n        train_data_loader = torch.utils.data.DataLoader(train_dataset,\n                                                        batch_size=batch_size,\n                                                        shuffle=True,\n                                                        pin_memory=True,\n                                                        num_workers=nw,\n                                                        collate_fn=train_dataset.collate_fn)\n\n    # load validation data set\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt\n    val_dataset = VOCDataSet(VOC_root, \"2012\", data_transform[\"val\"], \"val.txt\")\n    val_data_set_loader = torch.utils.data.DataLoader(val_dataset,\n                                                      batch_size=1,\n                                                      shuffle=False,\n                                                      pin_memory=True,\n                                                      num_workers=nw,\n                                                      collate_fn=val_dataset.collate_fn)\n\n    # create model num_classes equal background + 20 classes\n    model = create_model(num_classes=args.num_classes + 1)\n    # print(model)\n\n    model.to(device)\n\n    # define optimizer\n    params = [p for p in model.parameters() if p.requires_grad]\n    optimizer = torch.optim.SGD(params,\n                                lr=args.lr,\n                                momentum=args.momentum,\n                                weight_decay=args.weight_decay)\n\n    scaler = torch.cuda.amp.GradScaler() if args.amp else None\n\n    # learning rate scheduler\n    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,\n                                                   step_size=3,\n                                                   gamma=0.33)\n\n    # 如果指定了上次训练保存的权重文件地址，则接着上次结果接着训练\n    if args.resume != \"\":\n        checkpoint = torch.load(args.resume, map_location='cpu')\n        model.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n        if args.amp and \"scaler\" in checkpoint:\n            scaler.load_state_dict(checkpoint[\"scaler\"])\n        print(\"the training process from epoch{}...\".format(args.start_epoch))\n\n    train_loss = []\n    learning_rate = []\n    val_map = []\n\n    for epoch in range(args.start_epoch, args.epochs):\n        # train for one epoch, printing every 10 iterations\n        mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader,\n                                              device=device, epoch=epoch,\n                                              print_freq=50, warmup=True,\n                                              scaler=scaler)\n        train_loss.append(mean_loss.item())\n        learning_rate.append(lr)\n\n        # update the learning rate\n        lr_scheduler.step()\n\n        # evaluate on the test dataset\n        coco_info = utils.evaluate(model, val_data_set_loader, device=device)\n\n        # write into txt\n        with open(results_file, \"a\") as f:\n            # 写入的数据包括coco指标还有loss和learning rate\n            result_info = [f\"{i:.4f}\" for i in coco_info + [mean_loss.item()]] + [f\"{lr:.6f}\"]\n            txt = \"epoch:{} {}\".format(epoch, '  '.join(result_info))\n            f.write(txt + \"\\n\")\n\n        val_map.append(coco_info[1])  # pascal mAP\n\n        # save weights\n        save_files = {\n            'model': model.state_dict(),\n            'optimizer': optimizer.state_dict(),\n            'lr_scheduler': lr_scheduler.state_dict(),\n            'epoch': epoch}\n        if args.amp:\n            save_files[\"scaler\"] = scaler.state_dict()\n        torch.save(save_files, \"./save_weights/resNetFpn-model-{}.pth\".format(epoch))\n\n    # plot loss and lr curve\n    if len(train_loss) != 0 and len(learning_rate) != 0:\n        from plot_curve import plot_loss_and_lr\n        plot_loss_and_lr(train_loss, learning_rate)\n\n    # plot mAP curve\n    if len(val_map) != 0:\n        from plot_curve import plot_map\n        plot_map(val_map)\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 训练设备类型\n    parser.add_argument('--device', default='cuda:0', help='device')\n    # 训练数据集的根目录(VOCdevkit)\n    parser.add_argument('--data-path', default='./', help='dataset')\n    # 检测目标类别数(不包含背景)\n    parser.add_argument('--num-classes', default=20, type=int, help='num_classes')\n    # 文件保存地址\n    parser.add_argument('--output-dir', default='./save_weights', help='path where to save')\n    # 若需要接着上次训练，则指定上次训练保存权重文件地址\n    parser.add_argument('--resume', default='', type=str, help='resume from checkpoint')\n    # 指定接着从哪个epoch数开始训练\n    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')\n    # 训练的总epoch数\n    parser.add_argument('--epochs', default=15, type=int, metavar='N',\n                        help='number of total epochs to run')\n    # 学习率\n    parser.add_argument('--lr', default=0.01, type=float,\n                        help='initial learning rate, 0.02 is the default value for training '\n                             'on 8 gpus and 2 images_per_gpu')\n    # SGD的momentum参数\n    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',\n                        help='momentum')\n    # SGD的weight_decay参数\n    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,\n                        metavar='W', help='weight decay (default: 1e-4)',\n                        dest='weight_decay')\n    # 训练的batch size\n    parser.add_argument('--batch_size', default=8, type=int, metavar='N',\n                        help='batch size when training.')\n    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)\n    # 是否使用混合精度训练(需要GPU支持混合精度)\n    parser.add_argument(\"--amp\", default=False, help=\"Use torch.cuda.amp for mixed precision training\")\n\n    args = parser.parse_args()\n    print(args)\n\n    # 检查保存权重文件夹是否存在，不存在则创建\n    if not os.path.exists(args.output_dir):\n        os.makedirs(args.output_dir)\n\n    main(args)\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/train_utils/__init__.py",
    "content": "from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups\nfrom .distributed_utils import init_distributed_mode, save_on_master, mkdir\nfrom .coco_utils import get_coco_api_from_dataset\nfrom .coco_eval import CocoEvaluator\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/train_utils/coco_eval.py",
    "content": "import json\nfrom collections import defaultdict\n\nimport numpy as np\nimport copy\nimport torch\nimport torch._six\nfrom pycocotools.cocoeval import COCOeval\nfrom pycocotools.coco import COCO\nimport pycocotools.mask as mask_util\n\nfrom train_utils.distributed_utils import all_gather\n\n\nclass CocoEvaluator(object):\n    def __init__(self, coco_gt, iou_types):\n        assert isinstance(iou_types, (list, tuple))\n        coco_gt = copy.deepcopy(coco_gt)\n        self.coco_gt = coco_gt\n\n        self.iou_types = iou_types\n        self.coco_eval = {}\n        for iou_type in iou_types:\n            self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)\n\n        self.img_ids = []\n        self.eval_imgs = {k: [] for k in iou_types}\n\n    def update(self, predictions):\n        img_ids = list(np.unique(list(predictions.keys())))\n        self.img_ids.extend(img_ids)\n\n        for iou_type in self.iou_types:\n            results = self.prepare(predictions, iou_type)\n            coco_dt = loadRes(self.coco_gt, results) if results else COCO()\n            coco_eval = self.coco_eval[iou_type]\n\n            coco_eval.cocoDt = coco_dt\n            coco_eval.params.imgIds = list(img_ids)\n            img_ids, eval_imgs = evaluate(coco_eval)\n\n            self.eval_imgs[iou_type].append(eval_imgs)\n\n    def synchronize_between_processes(self):\n        for iou_type in self.iou_types:\n            self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)\n            create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])\n\n    def accumulate(self):\n        for coco_eval in self.coco_eval.values():\n            coco_eval.accumulate()\n\n    def summarize(self):\n        for iou_type, coco_eval in self.coco_eval.items():\n            print(\"IoU metric: {}\".format(iou_type))\n            coco_eval.summarize()\n\n    def prepare(self, predictions, iou_type):\n        if iou_type == \"bbox\":\n            return self.prepare_for_coco_detection(predictions)\n        elif iou_type == \"segm\":\n            return self.prepare_for_coco_segmentation(predictions)\n        elif iou_type == \"keypoints\":\n            return self.prepare_for_coco_keypoint(predictions)\n        else:\n            raise ValueError(\"Unknown iou type {}\".format(iou_type))\n\n    def prepare_for_coco_detection(self, predictions):\n        coco_results = []\n        for original_id, prediction in predictions.items():\n            if len(prediction) == 0:\n                continue\n\n            boxes = prediction[\"boxes\"]\n            boxes = convert_to_xywh(boxes).tolist()\n            scores = prediction[\"scores\"].tolist()\n            labels = prediction[\"labels\"].tolist()\n\n            coco_results.extend(\n                [\n                    {\n                        \"image_id\": original_id,\n                        \"category_id\": labels[k],\n                        \"bbox\": box,\n                        \"score\": scores[k],\n                    }\n                    for k, box in enumerate(boxes)\n                ]\n            )\n        return coco_results\n\n    def prepare_for_coco_segmentation(self, predictions):\n        coco_results = []\n        for original_id, prediction in predictions.items():\n            if len(prediction) == 0:\n                continue\n\n            scores = prediction[\"scores\"]\n            labels = prediction[\"labels\"]\n            masks = prediction[\"masks\"]\n\n            masks = masks > 0.5\n\n            scores = prediction[\"scores\"].tolist()\n            labels = prediction[\"labels\"].tolist()\n\n            rles = [\n                mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order=\"F\"))[0]\n                for mask in masks\n            ]\n            for rle in rles:\n                rle[\"counts\"] = rle[\"counts\"].decode(\"utf-8\")\n\n            coco_results.extend(\n                [\n                    {\n                        \"image_id\": original_id,\n                        \"category_id\": labels[k],\n                        \"segmentation\": rle,\n                        \"score\": scores[k],\n                    }\n                    for k, rle in enumerate(rles)\n                ]\n            )\n        return coco_results\n\n    def prepare_for_coco_keypoint(self, predictions):\n        coco_results = []\n        for original_id, prediction in predictions.items():\n            if len(prediction) == 0:\n                continue\n\n            boxes = prediction[\"boxes\"]\n            boxes = convert_to_xywh(boxes).tolist()\n            scores = prediction[\"scores\"].tolist()\n            labels = prediction[\"labels\"].tolist()\n            keypoints = prediction[\"keypoints\"]\n            keypoints = keypoints.flatten(start_dim=1).tolist()\n\n            coco_results.extend(\n                [\n                    {\n                        \"image_id\": original_id,\n                        \"category_id\": labels[k],\n                        'keypoints': keypoint,\n                        \"score\": scores[k],\n                    }\n                    for k, keypoint in enumerate(keypoints)\n                ]\n            )\n        return coco_results\n\n\ndef convert_to_xywh(boxes):\n    xmin, ymin, xmax, ymax = boxes.unbind(1)\n    return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)\n\n\ndef merge(img_ids, eval_imgs):\n    all_img_ids = all_gather(img_ids)\n    all_eval_imgs = all_gather(eval_imgs)\n\n    merged_img_ids = []\n    for p in all_img_ids:\n        merged_img_ids.extend(p)\n\n    merged_eval_imgs = []\n    for p in all_eval_imgs:\n        merged_eval_imgs.append(p)\n\n    merged_img_ids = np.array(merged_img_ids)\n    merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)\n\n    # keep only unique (and in sorted order) images\n    merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)\n    merged_eval_imgs = merged_eval_imgs[..., idx]\n\n    return merged_img_ids, merged_eval_imgs\n\n\ndef create_common_coco_eval(coco_eval, img_ids, eval_imgs):\n    img_ids, eval_imgs = merge(img_ids, eval_imgs)\n    img_ids = list(img_ids)\n    eval_imgs = list(eval_imgs.flatten())\n\n    coco_eval.evalImgs = eval_imgs\n    coco_eval.params.imgIds = img_ids\n    coco_eval._paramsEval = copy.deepcopy(coco_eval.params)\n\n\n#################################################################\n# From pycocotools, just removed the prints and fixed\n# a Python3 bug about unicode not defined\n#################################################################\n\n# Ideally, pycocotools wouldn't have hard-coded prints\n# so that we could avoid copy-pasting those two functions\n\ndef createIndex(self):\n    # create index\n    # print('creating index...')\n    anns, cats, imgs = {}, {}, {}\n    imgToAnns, catToImgs = defaultdict(list), defaultdict(list)\n    if 'annotations' in self.dataset:\n        for ann in self.dataset['annotations']:\n            imgToAnns[ann['image_id']].append(ann)\n            anns[ann['id']] = ann\n\n    if 'images' in self.dataset:\n        for img in self.dataset['images']:\n            imgs[img['id']] = img\n\n    if 'categories' in self.dataset:\n        for cat in self.dataset['categories']:\n            cats[cat['id']] = cat\n\n    if 'annotations' in self.dataset and 'categories' in self.dataset:\n        for ann in self.dataset['annotations']:\n            catToImgs[ann['category_id']].append(ann['image_id'])\n\n    # print('index created!')\n\n    # create class members\n    self.anns = anns\n    self.imgToAnns = imgToAnns\n    self.catToImgs = catToImgs\n    self.imgs = imgs\n    self.cats = cats\n\n\nmaskUtils = mask_util\n\n\ndef loadRes(self, resFile):\n    \"\"\"\n    Load result file and return a result api object.\n    :param   resFile (str)     : file name of result file\n    :return: res (obj)         : result api object\n    \"\"\"\n    res = COCO()\n    res.dataset['images'] = [img for img in self.dataset['images']]\n\n    # print('Loading and preparing results...')\n    # tic = time.time()\n    if isinstance(resFile, torch._six.string_classes):\n        anns = json.load(open(resFile))\n    elif type(resFile) == np.ndarray:\n        anns = self.loadNumpyAnnotations(resFile)\n    else:\n        anns = resFile\n    assert type(anns) == list, 'results in not an array of objects'\n    annsImgIds = [ann['image_id'] for ann in anns]\n    assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \\\n        'Results do not correspond to current coco set'\n    if 'caption' in anns[0]:\n        imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])\n        res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]\n        for id, ann in enumerate(anns):\n            ann['id'] = id + 1\n    elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:\n        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])\n        for id, ann in enumerate(anns):\n            bb = ann['bbox']\n            x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]\n            if 'segmentation' not in ann:\n                ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]\n            ann['area'] = bb[2] * bb[3]\n            ann['id'] = id + 1\n            ann['iscrowd'] = 0\n    elif 'segmentation' in anns[0]:\n        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])\n        for id, ann in enumerate(anns):\n            # now only support compressed RLE format as segmentation results\n            ann['area'] = maskUtils.area(ann['segmentation'])\n            if 'bbox' not in ann:\n                ann['bbox'] = maskUtils.toBbox(ann['segmentation'])\n            ann['id'] = id + 1\n            ann['iscrowd'] = 0\n    elif 'keypoints' in anns[0]:\n        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])\n        for id, ann in enumerate(anns):\n            s = ann['keypoints']\n            x = s[0::3]\n            y = s[1::3]\n            x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y)\n            ann['area'] = (x2 - x1) * (y2 - y1)\n            ann['id'] = id + 1\n            ann['bbox'] = [x1, y1, x2 - x1, y2 - y1]\n    # print('DONE (t={:0.2f}s)'.format(time.time()- tic))\n\n    res.dataset['annotations'] = anns\n    createIndex(res)\n    return res\n\n\ndef evaluate(self):\n    '''\n    Run per image evaluation on given images and store results (a list of dict) in self.evalImgs\n    :return: None\n    '''\n    # tic = time.time()\n    # print('Running per image evaluation...')\n    p = self.params\n    # add backward compatibility if useSegm is specified in params\n    if p.useSegm is not None:\n        p.iouType = 'segm' if p.useSegm == 1 else 'bbox'\n        print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))\n    # print('Evaluate annotation type *{}*'.format(p.iouType))\n    p.imgIds = list(np.unique(p.imgIds))\n    if p.useCats:\n        p.catIds = list(np.unique(p.catIds))\n    p.maxDets = sorted(p.maxDets)\n    self.params = p\n\n    self._prepare()\n    # loop through images, area range, max detection number\n    catIds = p.catIds if p.useCats else [-1]\n\n    if p.iouType == 'segm' or p.iouType == 'bbox':\n        computeIoU = self.computeIoU\n    elif p.iouType == 'keypoints':\n        computeIoU = self.computeOks\n    self.ious = {\n        (imgId, catId): computeIoU(imgId, catId)\n        for imgId in p.imgIds\n        for catId in catIds}\n\n    evaluateImg = self.evaluateImg\n    maxDet = p.maxDets[-1]\n    evalImgs = [\n        evaluateImg(imgId, catId, areaRng, maxDet)\n        for catId in catIds\n        for areaRng in p.areaRng\n        for imgId in p.imgIds\n    ]\n    # this is NOT in the pycocotools code, but could be done outside\n    evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))\n    self._paramsEval = copy.deepcopy(self.params)\n    # toc = time.time()\n    # print('DONE (t={:0.2f}s).'.format(toc-tic))\n    return p.imgIds, evalImgs\n\n#################################################################\n# end of straight copy from pycocotools, just removing the prints\n#################################################################\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/train_utils/coco_utils.py",
    "content": "import torch\nimport torchvision\nimport torch.utils.data\nfrom pycocotools.coco import COCO\n\n\ndef convert_to_coco_api(ds):\n    coco_ds = COCO()\n    # annotation IDs need to start at 1, not 0\n    ann_id = 1\n    dataset = {'images': [], 'categories': [], 'annotations': []}\n    categories = set()\n    for img_idx in range(len(ds)):\n        # find better way to get target\n        hw, targets = ds.coco_index(img_idx)\n        image_id = targets[\"image_id\"].item()\n        img_dict = {}\n        img_dict['id'] = image_id\n        img_dict['height'] = hw[0]\n        img_dict['width'] = hw[1]\n        dataset['images'].append(img_dict)\n        bboxes = targets[\"boxes\"]\n        bboxes[:, 2:] -= bboxes[:, :2]\n        bboxes = bboxes.tolist()\n        labels = targets['labels'].tolist()\n        areas = targets['area'].tolist()\n        iscrowd = targets['iscrowd'].tolist()\n        num_objs = len(bboxes)\n        for i in range(num_objs):\n            ann = {}\n            ann['image_id'] = image_id\n            ann['bbox'] = bboxes[i]\n            ann['category_id'] = labels[i]\n            categories.add(labels[i])\n            ann['area'] = areas[i]\n            ann['iscrowd'] = iscrowd[i]\n            ann['id'] = ann_id\n            dataset['annotations'].append(ann)\n            ann_id += 1\n    dataset['categories'] = [{'id': i} for i in sorted(categories)]\n    coco_ds.dataset = dataset\n    coco_ds.createIndex()\n    return coco_ds\n\n\ndef get_coco_api_from_dataset(dataset):\n    for _ in range(10):\n        if isinstance(dataset, torchvision.datasets.CocoDetection):\n            break\n        if isinstance(dataset, torch.utils.data.Subset):\n            dataset = dataset.dataset\n    if isinstance(dataset, torchvision.datasets.CocoDetection):\n        return dataset.coco\n    return convert_to_coco_api(dataset)\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/train_utils/distributed_utils.py",
    "content": "from collections import defaultdict, deque\nimport datetime\nimport pickle\nimport time\nimport errno\nimport os\n\nimport torch\nimport torch.distributed as dist\n\n\nclass SmoothedValue(object):\n    \"\"\"Track a series of values and provide access to smoothed values over a\n    window or the global series average.\n    \"\"\"\n    def __init__(self, window_size=20, fmt=None):\n        if fmt is None:\n            fmt = \"{value:.4f} ({global_avg:.4f})\"\n        self.deque = deque(maxlen=window_size)  # deque简单理解成加强版list\n        self.total = 0.0\n        self.count = 0\n        self.fmt = fmt\n\n    def update(self, value, n=1):\n        self.deque.append(value)\n        self.count += n\n        self.total += value * n\n\n    def synchronize_between_processes(self):\n        \"\"\"\n        Warning: does not synchronize the deque!\n        \"\"\"\n        if not is_dist_avail_and_initialized():\n            return\n        t = torch.tensor([self.count, self.total], dtype=torch.float64, device=\"cuda\")\n        dist.barrier()\n        dist.all_reduce(t)\n        t = t.tolist()\n        self.count = int(t[0])\n        self.total = t[1]\n\n    @property\n    def median(self):  # @property 是装饰器，这里可简单理解为增加median属性(只读)\n        d = torch.tensor(list(self.deque))\n        return d.median().item()\n\n    @property\n    def avg(self):\n        d = torch.tensor(list(self.deque), dtype=torch.float32)\n        return d.mean().item()\n\n    @property\n    def global_avg(self):\n        return self.total / self.count\n\n    @property\n    def max(self):\n        return max(self.deque)\n\n    @property\n    def value(self):\n        return self.deque[-1]\n\n    def __str__(self):\n        return self.fmt.format(\n            median=self.median,\n            avg=self.avg,\n            global_avg=self.global_avg,\n            max=self.max,\n            value=self.value)\n\n\ndef all_gather(data):\n    \"\"\"\n    Run all_gather on arbitrary picklable data (not necessarily tensors)\n    Args:\n        data: any picklable object\n    Returns:\n        list[data]: list of data gathered from each rank\n    \"\"\"\n    world_size = get_world_size()\n    if world_size == 1:\n        return [data]\n\n    # serialized to a Tensor\n    buffer = pickle.dumps(data)\n    storage = torch.ByteStorage.from_buffer(buffer)\n    tensor = torch.ByteTensor(storage).to(\"cuda\")\n\n    # obtain Tensor size of each rank\n    local_size = torch.tensor([tensor.numel()], device=\"cuda\")\n    size_list = [torch.tensor([0], device=\"cuda\") for _ in range(world_size)]\n    dist.all_gather(size_list, local_size)\n    size_list = [int(size.item()) for size in size_list]\n    max_size = max(size_list)\n\n    # receiving Tensor from all ranks\n    # we pad the tensor because torch all_gather does not support\n    # gathering tensors of different shapes\n    tensor_list = []\n    for _ in size_list:\n        tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device=\"cuda\"))\n    if local_size != max_size:\n        padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device=\"cuda\")\n        tensor = torch.cat((tensor, padding), dim=0)\n    dist.all_gather(tensor_list, tensor)\n\n    data_list = []\n    for size, tensor in zip(size_list, tensor_list):\n        buffer = tensor.cpu().numpy().tobytes()[:size]\n        data_list.append(pickle.loads(buffer))\n\n    return data_list\n\n\ndef reduce_dict(input_dict, average=True):\n    \"\"\"\n    Args:\n        input_dict (dict): all the values will be reduced\n        average (bool): whether to do average or sum\n    Reduce the values in the dictionary from all processes so that all processes\n    have the averaged results. Returns a dict with the same fields as\n    input_dict, after reduction.\n    \"\"\"\n    world_size = get_world_size()\n    if world_size < 2:  # 单GPU的情况\n        return input_dict\n    with torch.no_grad():  # 多GPU的情况\n        names = []\n        values = []\n        # sort the keys so that they are consistent across processes\n        for k in sorted(input_dict.keys()):\n            names.append(k)\n            values.append(input_dict[k])\n        values = torch.stack(values, dim=0)\n        dist.all_reduce(values)\n        if average:\n            values /= world_size\n\n        reduced_dict = {k: v for k, v in zip(names, values)}\n        return reduced_dict\n\n\nclass MetricLogger(object):\n    def __init__(self, delimiter=\"\\t\"):\n        self.meters = defaultdict(SmoothedValue)\n        self.delimiter = delimiter\n\n    def update(self, **kwargs):\n        for k, v in kwargs.items():\n            if isinstance(v, torch.Tensor):\n                v = v.item()\n            assert isinstance(v, (float, int))\n            self.meters[k].update(v)\n\n    def __getattr__(self, attr):\n        if attr in self.meters:\n            return self.meters[attr]\n        if attr in self.__dict__:\n            return self.__dict__[attr]\n        raise AttributeError(\"'{}' object has no attribute '{}'\".format(\n            type(self).__name__, attr))\n\n    def __str__(self):\n        loss_str = []\n        for name, meter in self.meters.items():\n            loss_str.append(\n                \"{}: {}\".format(name, str(meter))\n            )\n        return self.delimiter.join(loss_str)\n\n    def synchronize_between_processes(self):\n        for meter in self.meters.values():\n            meter.synchronize_between_processes()\n\n    def add_meter(self, name, meter):\n        self.meters[name] = meter\n\n    def log_every(self, iterable, print_freq, header=None):\n        i = 0\n        if not header:\n            header = \"\"\n        start_time = time.time()\n        end = time.time()\n        iter_time = SmoothedValue(fmt='{avg:.4f}')\n        data_time = SmoothedValue(fmt='{avg:.4f}')\n        space_fmt = \":\" + str(len(str(len(iterable)))) + \"d\"\n        if torch.cuda.is_available():\n            log_msg = self.delimiter.join([header,\n                                           '[{0' + space_fmt + '}/{1}]',\n                                           'eta: {eta}',\n                                           '{meters}',\n                                           'time: {time}',\n                                           'data: {data}',\n                                           'max mem: {memory:.0f}'])\n        else:\n            log_msg = self.delimiter.join([header,\n                                           '[{0' + space_fmt + '}/{1}]',\n                                           'eta: {eta}',\n                                           '{meters}',\n                                           'time: {time}',\n                                           'data: {data}'])\n        MB = 1024.0 * 1024.0\n        for obj in iterable:\n            data_time.update(time.time() - end)\n            yield obj\n            iter_time.update(time.time() - end)\n            if i % print_freq == 0 or i == len(iterable) - 1:\n                eta_second = iter_time.global_avg * (len(iterable) - i)\n                eta_string = str(datetime.timedelta(seconds=eta_second))\n                if torch.cuda.is_available():\n                    print(log_msg.format(i, len(iterable),\n                                         eta=eta_string,\n                                         meters=str(self),\n                                         time=str(iter_time),\n                                         data=str(data_time),\n                                         memory=torch.cuda.max_memory_allocated() / MB))\n                else:\n                    print(log_msg.format(i, len(iterable),\n                                         eta=eta_string,\n                                         meters=str(self),\n                                         time=str(iter_time),\n                                         data=str(data_time)))\n            i += 1\n            end = time.time()\n        total_time = time.time() - start_time\n        total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n        print('{} Total time: {} ({:.4f} s / it)'.format(header,\n                                                         total_time_str,\n\n                                                         total_time / len(iterable)))\n\n\ndef warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):\n\n    def f(x):\n        \"\"\"根据step数返回一个学习率倍率因子\"\"\"\n        if x >= warmup_iters:  # 当迭代数大于给定的warmup_iters时，倍率因子为1\n            return 1\n        alpha = float(x) / warmup_iters\n        # 迭代过程中倍率因子从warmup_factor -> 1\n        return warmup_factor * (1 - alpha) + alpha\n\n    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)\n\n\ndef mkdir(path):\n    try:\n        os.makedirs(path)\n    except OSError as e:\n        if e.errno != errno.EEXIST:\n            raise\n\n\ndef setup_for_distributed(is_master):\n    \"\"\"\n    This function disables when not in master process\n    \"\"\"\n    import builtins as __builtin__\n    builtin_print = __builtin__.print\n\n    def print(*args, **kwargs):\n        force = kwargs.pop('force', False)\n        if is_master or force:\n            builtin_print(*args, **kwargs)\n\n    __builtin__.print = print\n\n\ndef is_dist_avail_and_initialized():\n    \"\"\"检查是否支持分布式环境\"\"\"\n    if not dist.is_available():\n        return False\n    if not dist.is_initialized():\n        return False\n    return True\n\n\ndef get_world_size():\n    if not is_dist_avail_and_initialized():\n        return 1\n    return dist.get_world_size()\n\n\ndef get_rank():\n    if not is_dist_avail_and_initialized():\n        return 0\n    return dist.get_rank()\n\n\ndef is_main_process():\n    return get_rank() == 0\n\n\ndef save_on_master(*args, **kwargs):\n    if is_main_process():\n        torch.save(*args, **kwargs)\n\n\ndef init_distributed_mode(args):\n    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:\n        args.rank = int(os.environ[\"RANK\"])\n        args.world_size = int(os.environ['WORLD_SIZE'])\n        args.gpu = int(os.environ['LOCAL_RANK'])\n    elif 'SLURM_PROCID' in os.environ:\n        args.rank = int(os.environ['SLURM_PROCID'])\n        args.gpu = args.rank % torch.cuda.device_count()\n    else:\n        print('Not using distributed mode')\n        args.distributed = False\n        return\n\n    args.distributed = True\n\n    torch.cuda.set_device(args.gpu)\n    args.dist_backend = 'nccl'\n    print('| distributed init (rank {}): {}'.format(\n        args.rank, args.dist_url), flush=True)\n    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,\n                                         world_size=args.world_size, rank=args.rank)\n    # 使用torch1.9或以上时建议加上device_ids=[args.rank]\n    torch.distributed.barrier()\n    setup_for_distributed(args.rank == 0)\n\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/train_utils/group_by_aspect_ratio.py",
    "content": "import bisect\nfrom collections import defaultdict\nimport copy\nfrom itertools import repeat, chain\nimport math\nimport numpy as np\n\nimport torch\nimport torch.utils.data\nfrom torch.utils.data.sampler import BatchSampler, Sampler\nfrom torch.utils.model_zoo import tqdm\nimport torchvision\n\nfrom PIL import Image\n\n\ndef _repeat_to_at_least(iterable, n):\n    repeat_times = math.ceil(n / len(iterable))\n    repeated = chain.from_iterable(repeat(iterable, repeat_times))\n    return list(repeated)\n\n\nclass GroupedBatchSampler(BatchSampler):\n    \"\"\"\n    Wraps another sampler to yield a mini-batch of indices.\n    It enforces that the batch only contain elements from the same group.\n    It also tries to provide mini-batches which follows an ordering which is\n    as close as possible to the ordering from the original sampler.\n    Arguments:\n        sampler (Sampler): Base sampler.\n        group_ids (list[int]): If the sampler produces indices in range [0, N),\n            `group_ids` must be a list of `N` ints which contains the group id of each sample.\n            The group ids must be a continuous set of integers starting from\n            0, i.e. they must be in the range [0, num_groups).\n        batch_size (int): Size of mini-batch.\n    \"\"\"\n    def __init__(self, sampler, group_ids, batch_size):\n        if not isinstance(sampler, Sampler):\n            raise ValueError(\n                \"sampler should be an instance of \"\n                \"torch.utils.data.Sampler, but got sampler={}\".format(sampler)\n            )\n        self.sampler = sampler\n        self.group_ids = group_ids\n        self.batch_size = batch_size\n\n    def __iter__(self):\n        buffer_per_group = defaultdict(list)\n        samples_per_group = defaultdict(list)\n\n        num_batches = 0\n        for idx in self.sampler:\n            group_id = self.group_ids[idx]\n            buffer_per_group[group_id].append(idx)\n            samples_per_group[group_id].append(idx)\n            if len(buffer_per_group[group_id]) == self.batch_size:\n                yield buffer_per_group[group_id]\n                num_batches += 1\n                del buffer_per_group[group_id]\n            assert len(buffer_per_group[group_id]) < self.batch_size\n\n        # now we have run out of elements that satisfy\n        # the group criteria, let's return the remaining\n        # elements so that the size of the sampler is\n        # deterministic\n        expected_num_batches = len(self)\n        num_remaining = expected_num_batches - num_batches\n        if num_remaining > 0:\n            # for the remaining batches, take first the buffers with largest number\n            # of elements\n            for group_id, _ in sorted(buffer_per_group.items(),\n                                      key=lambda x: len(x[1]), reverse=True):\n                remaining = self.batch_size - len(buffer_per_group[group_id])\n                samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining)\n                buffer_per_group[group_id].extend(samples_from_group_id[:remaining])\n                assert len(buffer_per_group[group_id]) == self.batch_size\n                yield buffer_per_group[group_id]\n                num_remaining -= 1\n                if num_remaining == 0:\n                    break\n        assert num_remaining == 0\n\n    def __len__(self):\n        return len(self.sampler) // self.batch_size\n\n\ndef _compute_aspect_ratios_slow(dataset, indices=None):\n    print(\"Your dataset doesn't support the fast path for \"\n          \"computing the aspect ratios, so will iterate over \"\n          \"the full dataset and load every image instead. \"\n          \"This might take some time...\")\n    if indices is None:\n        indices = range(len(dataset))\n\n    class SubsetSampler(Sampler):\n        def __init__(self, indices):\n            self.indices = indices\n\n        def __iter__(self):\n            return iter(self.indices)\n\n        def __len__(self):\n            return len(self.indices)\n\n    sampler = SubsetSampler(indices)\n    data_loader = torch.utils.data.DataLoader(\n        dataset, batch_size=1, sampler=sampler,\n        num_workers=14,  # you might want to increase it for faster processing\n        collate_fn=lambda x: x[0])\n    aspect_ratios = []\n    with tqdm(total=len(dataset)) as pbar:\n        for _i, (img, _) in enumerate(data_loader):\n            pbar.update(1)\n            height, width = img.shape[-2:]\n            aspect_ratio = float(width) / float(height)\n            aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_custom_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n    aspect_ratios = []\n    for i in indices:\n        height, width = dataset.get_height_and_width(i)\n        aspect_ratio = float(width) / float(height)\n        aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_coco_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n    aspect_ratios = []\n    for i in indices:\n        img_info = dataset.coco.imgs[dataset.ids[i]]\n        aspect_ratio = float(img_info[\"width\"]) / float(img_info[\"height\"])\n        aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_voc_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n    aspect_ratios = []\n    for i in indices:\n        # this doesn't load the data into memory, because PIL loads it lazily\n        width, height = Image.open(dataset.images[i]).size\n        aspect_ratio = float(width) / float(height)\n        aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_subset_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n\n    ds_indices = [dataset.indices[i] for i in indices]\n    return compute_aspect_ratios(dataset.dataset, ds_indices)\n\n\ndef compute_aspect_ratios(dataset, indices=None):\n    if hasattr(dataset, \"get_height_and_width\"):\n        return _compute_aspect_ratios_custom_dataset(dataset, indices)\n\n    if isinstance(dataset, torchvision.datasets.CocoDetection):\n        return _compute_aspect_ratios_coco_dataset(dataset, indices)\n\n    if isinstance(dataset, torchvision.datasets.VOCDetection):\n        return _compute_aspect_ratios_voc_dataset(dataset, indices)\n\n    if isinstance(dataset, torch.utils.data.Subset):\n        return _compute_aspect_ratios_subset_dataset(dataset, indices)\n\n    # slow path\n    return _compute_aspect_ratios_slow(dataset, indices)\n\n\ndef _quantize(x, bins):\n    bins = copy.deepcopy(bins)\n    bins = sorted(bins)\n    # bisect_right：寻找y元素按顺序应该排在bins中哪个元素的右边，返回的是索引\n    quantized = list(map(lambda y: bisect.bisect_right(bins, y), x))\n    return quantized\n\n\ndef create_aspect_ratio_groups(dataset, k=0):\n    # 计算所有数据集中的图片width/height比例\n    aspect_ratios = compute_aspect_ratios(dataset)\n    # 将[0.5, 2]区间划分成2*k等份(2k+1个点，2k个区间)\n    bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0]\n\n    # 统计所有图像比例在bins区间中的位置索引\n    groups = _quantize(aspect_ratios, bins)\n    # count number of elements per group\n    # 统计每个区间的频次\n    counts = np.unique(groups, return_counts=True)[1]\n    fbins = [0] + bins + [np.inf]\n    print(\"Using {} as bins for aspect ratio quantization\".format(fbins))\n    print(\"Count of instances per bin: {}\".format(counts))\n    return groups\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/train_utils/train_eval_utils.py",
    "content": "import math\nimport sys\nimport time\n\nimport torch\n\nfrom .coco_utils import get_coco_api_from_dataset\nfrom .coco_eval import CocoEvaluator\nimport train_utils.distributed_utils as utils\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch,\n                    print_freq=50, warmup=False, scaler=None):\n    model.train()\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))\n    header = 'Epoch: [{}]'.format(epoch)\n\n    lr_scheduler = None\n    if epoch == 0 and warmup is True:  # 当训练第一轮（epoch=0）时，启用warmup训练方式，可理解为热身训练\n        warmup_factor = 1.0 / 1000\n        warmup_iters = min(1000, len(data_loader) - 1)\n\n        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)\n\n    mloss = torch.zeros(1).to(device)  # mean losses\n    for i, [images, targets] in enumerate(metric_logger.log_every(data_loader, print_freq, header)):\n        images = list(image.to(device) for image in images)\n        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]\n\n        # 混合精度训练上下文管理器，如果在CPU环境中不起任何作用\n        with torch.cuda.amp.autocast(enabled=scaler is not None):\n            loss_dict = model(images, targets)\n            losses = sum(loss for loss in loss_dict.values())\n\n        # reduce losses over all GPUs for logging purpose\n        loss_dict_reduced = utils.reduce_dict(loss_dict)\n        losses_reduced = sum(loss for loss in loss_dict_reduced.values())\n\n        loss_value = losses_reduced.item()\n        # 记录训练损失\n        mloss = (mloss * i + loss_value) / (i + 1)  # update mean losses\n\n        if not math.isfinite(loss_value):  # 当计算的损失为无穷大时停止训练\n            print(\"Loss is {}, stopping training\".format(loss_value))\n            print(loss_dict_reduced)\n            sys.exit(1)\n\n        optimizer.zero_grad()\n        if scaler is not None:\n            scaler.scale(losses).backward()\n            scaler.step(optimizer)\n            scaler.update()\n        else:\n            losses.backward()\n            optimizer.step()\n\n        if lr_scheduler is not None:  # 第一轮使用warmup训练方式\n            lr_scheduler.step()\n\n        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)\n        now_lr = optimizer.param_groups[0][\"lr\"]\n        metric_logger.update(lr=now_lr)\n\n    return mloss, now_lr\n\n\n@torch.no_grad()\ndef evaluate(model, data_loader, device):\n\n    cpu_device = torch.device(\"cpu\")\n    model.eval()\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    header = \"Test: \"\n\n    coco = get_coco_api_from_dataset(data_loader.dataset)\n    iou_types = _get_iou_types(model)\n    coco_evaluator = CocoEvaluator(coco, iou_types)\n\n    for image, targets in metric_logger.log_every(data_loader, 100, header):\n        image = list(img.to(device) for img in image)\n\n        # 当使用CPU时，跳过GPU相关指令\n        if device != torch.device(\"cpu\"):\n            torch.cuda.synchronize(device)\n\n        model_time = time.time()\n        outputs = model(image)\n\n        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]\n        model_time = time.time() - model_time\n\n        res = {target[\"image_id\"].item(): output for target, output in zip(targets, outputs)}\n\n        evaluator_time = time.time()\n        coco_evaluator.update(res)\n        evaluator_time = time.time() - evaluator_time\n        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)\n\n    # gather the stats from all processes\n    metric_logger.synchronize_between_processes()\n    print(\"Averaged stats:\", metric_logger)\n    coco_evaluator.synchronize_between_processes()\n\n    # accumulate predictions from all images\n    coco_evaluator.accumulate()\n    coco_evaluator.summarize()\n\n    coco_info = coco_evaluator.coco_eval[iou_types[0]].stats.tolist()  # numpy to list\n\n    return coco_info\n\n\ndef _get_iou_types(model):\n    model_without_ddp = model\n    if isinstance(model, torch.nn.parallel.DistributedDataParallel):\n        model_without_ddp = model.module\n    iou_types = [\"bbox\"]\n    return iou_types\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/transforms.py",
    "content": "import random\nfrom torchvision.transforms import functional as F\n\n\nclass Compose(object):\n    \"\"\"组合多个transform函数\"\"\"\n    def __init__(self, transforms):\n        self.transforms = transforms\n\n    def __call__(self, image, target):\n        for t in self.transforms:\n            image, target = t(image, target)\n        return image, target\n\n\nclass ToTensor(object):\n    \"\"\"将PIL图像转为Tensor\"\"\"\n    def __call__(self, image, target):\n        image = F.to_tensor(image)\n        return image, target\n\n\nclass RandomHorizontalFlip(object):\n    \"\"\"随机水平翻转图像以及bboxes\"\"\"\n    def __init__(self, prob=0.5):\n        self.prob = prob\n\n    def __call__(self, image, target):\n        if random.random() < self.prob:\n            height, width = image.shape[-2:]\n            image = image.flip(-1)  # 水平翻转图片\n            bbox = target[\"boxes\"]\n            # bbox: xmin, ymin, xmax, ymax\n            bbox[:, [0, 2]] = width - bbox[:, [2, 0]]  # 翻转对应bbox坐标信息\n            target[\"boxes\"] = bbox\n        return image, target\n"
  },
  {
    "path": "pytorch_object_detection/faster_rcnn/validation.py",
    "content": "\"\"\"\n该脚本用于调用训练好的模型权重去计算验证集/测试集的COCO指标\n以及每个类别的mAP(IoU=0.5)\n\"\"\"\n\nimport os\nimport json\n\nimport torch\nfrom tqdm import tqdm\nimport numpy as np\n\nimport transforms\nfrom network_files import FasterRCNN\nfrom backbone import resnet50_fpn_backbone\nfrom my_dataset import VOCDataSet\nfrom train_utils import get_coco_api_from_dataset, CocoEvaluator\n\n\ndef summarize(self, catId=None):\n    \"\"\"\n    Compute and display summary metrics for evaluation results.\n    Note this functin can *only* be applied on the default parameter setting\n    \"\"\"\n\n    def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100):\n        p = self.params\n        iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'\n        titleStr = 'Average Precision' if ap == 1 else 'Average Recall'\n        typeStr = '(AP)' if ap == 1 else '(AR)'\n        iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \\\n            if iouThr is None else '{:0.2f}'.format(iouThr)\n\n        aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]\n        mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]\n\n        if ap == 1:\n            # dimension of precision: [TxRxKxAxM]\n            s = self.eval['precision']\n            # IoU\n            if iouThr is not None:\n                t = np.where(iouThr == p.iouThrs)[0]\n                s = s[t]\n\n            if isinstance(catId, int):\n                s = s[:, :, catId, aind, mind]\n            else:\n                s = s[:, :, :, aind, mind]\n\n        else:\n            # dimension of recall: [TxKxAxM]\n            s = self.eval['recall']\n            if iouThr is not None:\n                t = np.where(iouThr == p.iouThrs)[0]\n                s = s[t]\n\n            if isinstance(catId, int):\n                s = s[:, catId, aind, mind]\n            else:\n                s = s[:, :, aind, mind]\n\n        if len(s[s > -1]) == 0:\n            mean_s = -1\n        else:\n            mean_s = np.mean(s[s > -1])\n\n        print_string = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)\n        return mean_s, print_string\n\n    stats, print_list = [0] * 12, [\"\"] * 12\n    stats[0], print_list[0] = _summarize(1)\n    stats[1], print_list[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2])\n    stats[2], print_list[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2])\n    stats[3], print_list[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2])\n    stats[4], print_list[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2])\n    stats[5], print_list[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2])\n    stats[6], print_list[6] = _summarize(0, maxDets=self.params.maxDets[0])\n    stats[7], print_list[7] = _summarize(0, maxDets=self.params.maxDets[1])\n    stats[8], print_list[8] = _summarize(0, maxDets=self.params.maxDets[2])\n    stats[9], print_list[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2])\n    stats[10], print_list[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2])\n    stats[11], print_list[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2])\n\n    print_info = \"\\n\".join(print_list)\n\n    if not self.eval:\n        raise Exception('Please run accumulate() first')\n\n    return stats, print_info\n\n\ndef main(parser_data):\n    device = torch.device(parser_data.device if torch.cuda.is_available() else \"cpu\")\n    print(\"Using {} device training.\".format(device.type))\n\n    data_transform = {\n        \"val\": transforms.Compose([transforms.ToTensor()])\n    }\n\n    # read class_indict\n    label_json_path = './pascal_voc_classes.json'\n    assert os.path.exists(label_json_path), \"json file {} dose not exist.\".format(label_json_path)\n    with open(label_json_path, 'r') as f:\n        class_dict = json.load(f)\n\n    category_index = {v: k for k, v in class_dict.items()}\n\n    VOC_root = parser_data.data_path\n    # check voc root\n    if os.path.exists(os.path.join(VOC_root, \"VOCdevkit\")) is False:\n        raise FileNotFoundError(\"VOCdevkit dose not in path:'{}'.\".format(VOC_root))\n\n    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch\n    batch_size = parser_data.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using %g dataloader workers' % nw)\n\n    # load validation data set\n    val_dataset = VOCDataSet(VOC_root, \"2012\", data_transform[\"val\"], \"val.txt\")\n    val_dataset_loader = torch.utils.data.DataLoader(val_dataset,\n                                                     batch_size=1,\n                                                     shuffle=False,\n                                                     num_workers=nw,\n                                                     pin_memory=True,\n                                                     collate_fn=val_dataset.collate_fn)\n\n    # create model num_classes equal background + 20 classes\n    # 注意，这里的norm_layer要和训练脚本中保持一致\n    backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d)\n    model = FasterRCNN(backbone=backbone, num_classes=parser_data.num_classes + 1)\n\n    # 载入你自己训练好的模型权重\n    weights_path = parser_data.weights_path\n    assert os.path.exists(weights_path), \"not found {} file.\".format(weights_path)\n    weights_dict = torch.load(weights_path, map_location='cpu')\n    weights_dict = weights_dict[\"model\"] if \"model\" in weights_dict else weights_dict\n    model.load_state_dict(weights_dict)\n    # print(model)\n\n    model.to(device)\n\n    # evaluate on the test dataset\n    coco = get_coco_api_from_dataset(val_dataset)\n    iou_types = [\"bbox\"]\n    coco_evaluator = CocoEvaluator(coco, iou_types)\n    cpu_device = torch.device(\"cpu\")\n\n    model.eval()\n    with torch.no_grad():\n        for image, targets in tqdm(val_dataset_loader, desc=\"validation...\"):\n            # 将图片传入指定设备device\n            image = list(img.to(device) for img in image)\n\n            # inference\n            outputs = model(image)\n\n            outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]\n            res = {target[\"image_id\"].item(): output for target, output in zip(targets, outputs)}\n            coco_evaluator.update(res)\n\n    coco_evaluator.synchronize_between_processes()\n\n    # accumulate predictions from all images\n    coco_evaluator.accumulate()\n    coco_evaluator.summarize()\n\n    coco_eval = coco_evaluator.coco_eval[\"bbox\"]\n    # calculate COCO info for all classes\n    coco_stats, print_coco = summarize(coco_eval)\n\n    # calculate voc info for every classes(IoU=0.5)\n    voc_map_info_list = []\n    for i in range(len(category_index)):\n        stats, _ = summarize(coco_eval, catId=i)\n        voc_map_info_list.append(\" {:15}: {}\".format(category_index[i + 1], stats[1]))\n\n    print_voc = \"\\n\".join(voc_map_info_list)\n    print(print_voc)\n\n    # 将验证结果保存至txt文件中\n    with open(\"record_mAP.txt\", \"w\") as f:\n        record_lines = [\"COCO results:\",\n                        print_coco,\n                        \"\",\n                        \"mAP(IoU=0.5) for each category:\",\n                        print_voc]\n        f.write(\"\\n\".join(record_lines))\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 使用设备类型\n    parser.add_argument('--device', default='cuda', help='device')\n\n    # 检测目标类别数\n    parser.add_argument('--num-classes', type=int, default='20', help='number of classes')\n\n    # 数据集的根目录(VOCdevkit)\n    parser.add_argument('--data-path', default='/data/', help='dataset root')\n\n    # 训练好的权重文件\n    parser.add_argument('--weights-path', default='./save_weights/model.pth', type=str, help='training weights')\n\n    # batch size\n    parser.add_argument('--batch_size', default=1, type=int, metavar='N',\n                        help='batch size when validation.')\n\n    args = parser.parse_args()\n\n    main(args)\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/README.md",
    "content": "# Mask R-CNN\n\n## 该项目参考自pytorch官方torchvision模块中的源码(使用pycocotools处略有不同)\n* https://github.com/pytorch/vision/tree/master/references/detection\n\n## 环境配置：\n* Python3.6/3.7/3.8\n* Pytorch1.10或以上\n* pycocotools(Linux:`pip install pycocotools`; Windows:`pip install pycocotools-windows`(不需要额外安装vs))\n* Ubuntu或Centos(不建议Windows)\n* 最好使用GPU训练\n* 详细环境配置见`requirements.txt`\n\n## 文件结构：\n```\n  ├── backbone: 特征提取网络\n  ├── network_files: Mask R-CNN网络\n  ├── train_utils: 训练验证相关模块（包括coco验证相关）\n  ├── my_dataset_coco.py: 自定义dataset用于读取COCO2017数据集\n  ├── my_dataset_voc.py: 自定义dataset用于读取Pascal VOC数据集\n  ├── train.py: 单GPU/CPU训练脚本\n  ├── train_multi_GPU.py: 针对使用多GPU的用户使用\n  ├── predict.py: 简易的预测脚本，使用训练好的权重进行预测\n  ├── validation.py: 利用训练好的权重验证/测试数据的COCO指标，并生成record_mAP.txt文件\n  └── transforms.py: 数据预处理（随机水平翻转图像以及bboxes、将PIL图像转为Tensor）\n```\n\n## 预训练权重下载地址（下载后放入当前文件夹中）：\n* Resnet50预训练权重 https://download.pytorch.org/models/resnet50-0676ba61.pth (注意，下载预训练权重后要重命名，\n比如在train.py中读取的是`resnet50.pth`文件，不是`resnet50-0676ba61.pth`)\n* Mask R-CNN(Resnet50+FPN)预训练权重 https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth (注意，\n载预训练权重后要重命名，比如在train.py中读取的是`maskrcnn_resnet50_fpn_coco.pth`文件，不是`maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth`)\n \n \n## 数据集，本例程使用的有COCO2017数据集和Pascal VOC2012数据集\n### COCO2017数据集\n* COCO官网地址：https://cocodataset.org/\n* 对数据集不了解的可以看下我写的博文：https://blog.csdn.net/qq_37541097/article/details/113247318\n* 这里以下载coco2017数据集为例，主要下载三个文件：\n    * `2017 Train images [118K/18GB]`：训练过程中使用到的所有图像文件\n    * `2017 Val images [5K/1GB]`：验证过程中使用到的所有图像文件\n    * `2017 Train/Val annotations [241MB]`：对应训练集和验证集的标注json文件\n* 都解压到`coco2017`文件夹下，可得到如下文件夹结构：\n```\n├── coco2017: 数据集根目录\n     ├── train2017: 所有训练图像文件夹(118287张)\n     ├── val2017: 所有验证图像文件夹(5000张)\n     └── annotations: 对应标注文件夹\n              ├── instances_train2017.json: 对应目标检测、分割任务的训练集标注文件\n              ├── instances_val2017.json: 对应目标检测、分割任务的验证集标注文件\n              ├── captions_train2017.json: 对应图像描述的训练集标注文件\n              ├── captions_val2017.json: 对应图像描述的验证集标注文件\n              ├── person_keypoints_train2017.json: 对应人体关键点检测的训练集标注文件\n              └── person_keypoints_val2017.json: 对应人体关键点检测的验证集标注文件夹\n```\n\n### Pascal VOC2012数据集\n* 数据集下载地址： http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html#devkit\n* 对数据集不了解的可以看下我写的博文：https://blog.csdn.net/qq_37541097/article/details/115787033\n* 解压后得到的文件夹结构如下：\n```\nVOCdevkit\n    └── VOC2012\n         ├── Annotations               所有的图像标注信息(XML文件)\n         ├── ImageSets\n         │   ├── Action                人的行为动作图像信息\n         │   ├── Layout                人的各个部位图像信息\n         │   │\n         │   ├── Main                  目标检测分类图像信息\n         │   │     ├── train.txt       训练集(5717)\n         │   │     ├── val.txt         验证集(5823)\n         │   │     └── trainval.txt    训练集+验证集(11540)\n         │   │\n         │   └── Segmentation          目标分割图像信息\n         │         ├── train.txt       训练集(1464)\n         │         ├── val.txt         验证集(1449)\n         │         └── trainval.txt    训练集+验证集(2913)\n         │\n         ├── JPEGImages                所有图像文件\n         ├── SegmentationClass         语义分割png图（基于类别）\n         └── SegmentationObject        实例分割png图（基于目标）\n```\n\n## 训练方法\n* 确保提前准备好数据集\n* 确保提前下载好对应预训练模型权重\n* 确保设置好`--num-classes`和`--data-path`\n* 若要使用单GPU训练直接使用train.py训练脚本\n* 若要使用多GPU训练，使用`torchrun --nproc_per_node=8 train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量\n* 如果想指定使用哪些GPU设备可在指令前加上`CUDA_VISIBLE_DEVICES=0,3`(例如我只要使用设备中的第1块和第4块GPU设备)\n* `CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py`\n\n## 注意事项\n1. 在使用训练脚本时，注意要将`--data-path`设置为自己存放数据集的**根目录**：\n```\n# 假设要使用COCO数据集，启用自定义数据集读取CocoDetection并将数据集解压到成/data/coco2017目录下\npython train.py --data-path /data/coco2017\n\n# 假设要使用Pascal VOC数据集，启用自定义数据集读取VOCInstances并数据集解压到成/data/VOCdevkit目录下\npython train.py --data-path /data/VOCdevkit\n```\n\n2. 如果倍增`batch_size`，建议学习率也跟着倍增。假设将`batch_size`从4设置成8，那么学习率`lr`从0.004设置成0.008\n3. 如果使用Batch Normalization模块时，`batch_size`不能小于4，否则效果会变差。**如果显存不够，batch_size必须小于4时**，建议在创建`resnet50_fpn_backbone`时，\n将`norm_layer`设置成`FrozenBatchNorm2d`或将`trainable_layers`设置成0(即冻结整个`backbone`)\n4. 训练过程中保存的`det_results.txt`(目标检测任务)以及`seg_results.txt`(实例分割任务)是每个epoch在验证集上的COCO指标，前12个值是COCO指标，后面两个值是训练平均损失以及学习率\n5. 在使用预测脚本时，要将`weights_path`设置为你自己生成的权重路径。\n6. 使用validation文件时，注意确保你的验证集或者测试集中必须包含每个类别的目标，并且使用时需要修改`--num-classes`、`--data-path`、`--weights-path`以及\n`--label-json-path`（该参数是根据训练的数据集设置的）。其他代码尽量不要改动\n\n\n## 复现结果\n在COCO2017数据集上进行复现，训练过程中仅载入Resnet50的预训练权重，训练26个epochs。训练采用指令如下：\n```\ntorchrun --nproc_per_node=8 train_multi_GPU.py --batch-size 8 --lr 0.08 --pretrain False --amp True\n```\n\n训练得到权重下载地址： https://pan.baidu.com/s/1qpXUIsvnj8RHY-V05J-mnA  密码: 63d5\n\n在COCO2017验证集上的mAP(目标检测任务)：\n```\n Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.381\n Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.588\n Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.411\n Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.215\n Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.420\n Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.492\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.315\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.499\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.523\n Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.319\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.565\n Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.666\n```\n\n在COCO2017验证集上的mAP(实例分割任务)：\n```\n Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.340\n Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.552\n Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.361\n Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.151\n Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.369\n Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.500\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.290\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.449\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.468\n Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.266\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.509\n Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.619\n```\n\n## 如果对Mask RCNN原理不是很理解可参考我的bilibili\nhttps://www.bilibili.com/video/BV1ZY411774T\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/backbone/__init__.py",
    "content": "from .resnet50_fpn_model import resnet50_fpn_backbone\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/backbone/feature_pyramid_network.py",
    "content": "from collections import OrderedDict\n\nimport torch.nn as nn\nimport torch\nfrom torch import Tensor\nimport torch.nn.functional as F\n\nfrom torch.jit.annotations import Tuple, List, Dict\n\n\nclass IntermediateLayerGetter(nn.ModuleDict):\n    \"\"\"\n    Module wrapper that returns intermediate layers from a model\n    It has a strong assumption that the modules have been registered\n    into the model in the same order as they are used.\n    This means that one should **not** reuse the same nn.Module\n    twice in the forward if you want this to work.\n    Additionally, it is only able to query submodules that are directly\n    assigned to the model. So if `model` is passed, `model.feature1` can\n    be returned, but not `model.feature1.layer2`.\n    Arguments:\n        model (nn.Module): model on which we will extract the features\n        return_layers (Dict[name, new_name]): a dict containing the names\n            of the modules for which the activations will be returned as\n            the key of the dict, and the value of the dict is the name\n            of the returned activation (which the user can specify).\n    \"\"\"\n    __annotations__ = {\n        \"return_layers\": Dict[str, str],\n    }\n\n    def __init__(self, model, return_layers):\n        if not set(return_layers).issubset([name for name, _ in model.named_children()]):\n            raise ValueError(\"return_layers are not present in model\")\n\n        orig_return_layers = return_layers\n        return_layers = {str(k): str(v) for k, v in return_layers.items()}\n        layers = OrderedDict()\n\n        # 遍历模型子模块按顺序存入有序字典\n        # 只保存layer4及其之前的结构，舍去之后不用的结构\n        for name, module in model.named_children():\n            layers[name] = module\n            if name in return_layers:\n                del return_layers[name]\n            if not return_layers:\n                break\n\n        super().__init__(layers)\n        self.return_layers = orig_return_layers\n\n    def forward(self, x):\n        out = OrderedDict()\n        # 依次遍历模型的所有子模块，并进行正向传播，\n        # 收集layer1, layer2, layer3, layer4的输出\n        for name, module in self.items():\n            x = module(x)\n            if name in self.return_layers:\n                out_name = self.return_layers[name]\n                out[out_name] = x\n        return out\n\n\nclass BackboneWithFPN(nn.Module):\n    \"\"\"\n    Adds a FPN on top of a model.\n    Internally, it uses torchvision.models._utils.IntermediateLayerGetter to\n    extract a submodel that returns the feature maps specified in return_layers.\n    The same limitations of IntermediatLayerGetter apply here.\n    Arguments:\n        backbone (nn.Module)\n        return_layers (Dict[name, new_name]): a dict containing the names\n            of the modules for which the activations will be returned as\n            the key of the dict, and the value of the dict is the name\n            of the returned activation (which the user can specify).\n        in_channels_list (List[int]): number of channels for each feature map\n            that is returned, in the order they are present in the OrderedDict\n        out_channels (int): number of channels in the FPN.\n        extra_blocks: ExtraFPNBlock\n    Attributes:\n        out_channels (int): the number of channels in the FPN\n    \"\"\"\n\n    def __init__(self,\n                 backbone: nn.Module,\n                 return_layers=None,\n                 in_channels_list=None,\n                 out_channels=256,\n                 extra_blocks=None,\n                 re_getter=True):\n        super().__init__()\n\n        if extra_blocks is None:\n            extra_blocks = LastLevelMaxPool()\n\n        if re_getter:\n            assert return_layers is not None\n            self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)\n        else:\n            self.body = backbone\n\n        self.fpn = FeaturePyramidNetwork(\n            in_channels_list=in_channels_list,\n            out_channels=out_channels,\n            extra_blocks=extra_blocks,\n        )\n\n        self.out_channels = out_channels\n\n    def forward(self, x):\n        x = self.body(x)\n        x = self.fpn(x)\n        return x\n\n\nclass FeaturePyramidNetwork(nn.Module):\n    \"\"\"\n    Module that adds a FPN from on top of a set of feature maps. This is based on\n    `\"Feature Pyramid Network for Object Detection\" <https://arxiv.org/abs/1612.03144>`_.\n    The feature maps are currently supposed to be in increasing depth\n    order.\n    The input to the model is expected to be an OrderedDict[Tensor], containing\n    the feature maps on top of which the FPN will be added.\n    Arguments:\n        in_channels_list (list[int]): number of channels for each feature map that\n            is passed to the module\n        out_channels (int): number of channels of the FPN representation\n        extra_blocks (ExtraFPNBlock or None): if provided, extra operations will\n            be performed. It is expected to take the fpn features, the original\n            features and the names of the original features as input, and returns\n            a new list of feature maps and their corresponding names\n    \"\"\"\n\n    def __init__(self, in_channels_list, out_channels, extra_blocks=None):\n        super().__init__()\n        # 用来调整resnet特征矩阵(layer1,2,3,4)的channel（kernel_size=1）\n        self.inner_blocks = nn.ModuleList()\n        # 对调整后的特征矩阵使用3x3的卷积核来得到对应的预测特征矩阵\n        self.layer_blocks = nn.ModuleList()\n        for in_channels in in_channels_list:\n            if in_channels == 0:\n                continue\n            inner_block_module = nn.Conv2d(in_channels, out_channels, 1)\n            layer_block_module = nn.Conv2d(out_channels, out_channels, 3, padding=1)\n            self.inner_blocks.append(inner_block_module)\n            self.layer_blocks.append(layer_block_module)\n\n        # initialize parameters now to avoid modifying the initialization of top_blocks\n        for m in self.children():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_uniform_(m.weight, a=1)\n                nn.init.constant_(m.bias, 0)\n\n        self.extra_blocks = extra_blocks\n\n    def get_result_from_inner_blocks(self, x: Tensor, idx: int) -> Tensor:\n        \"\"\"\n        This is equivalent to self.inner_blocks[idx](x),\n        but torchscript doesn't support this yet\n        \"\"\"\n        num_blocks = len(self.inner_blocks)\n        if idx < 0:\n            idx += num_blocks\n        i = 0\n        out = x\n        for module in self.inner_blocks:\n            if i == idx:\n                out = module(x)\n            i += 1\n        return out\n\n    def get_result_from_layer_blocks(self, x: Tensor, idx: int) -> Tensor:\n        \"\"\"\n        This is equivalent to self.layer_blocks[idx](x),\n        but torchscript doesn't support this yet\n        \"\"\"\n        num_blocks = len(self.layer_blocks)\n        if idx < 0:\n            idx += num_blocks\n        i = 0\n        out = x\n        for module in self.layer_blocks:\n            if i == idx:\n                out = module(x)\n            i += 1\n        return out\n\n    def forward(self, x: Dict[str, Tensor]) -> Dict[str, Tensor]:\n        \"\"\"\n        Computes the FPN for a set of feature maps.\n        Arguments:\n            x (OrderedDict[Tensor]): feature maps for each feature level.\n        Returns:\n            results (OrderedDict[Tensor]): feature maps after FPN layers.\n                They are ordered from highest resolution first.\n        \"\"\"\n        # unpack OrderedDict into two lists for easier handling\n        names = list(x.keys())\n        x = list(x.values())\n\n        # 将resnet layer4的channel调整到指定的out_channels\n        # last_inner = self.inner_blocks[-1](x[-1])\n        last_inner = self.get_result_from_inner_blocks(x[-1], -1)\n        # result中保存着每个预测特征层\n        results = []\n        # 将layer4调整channel后的特征矩阵，通过3x3卷积后得到对应的预测特征矩阵\n        # results.append(self.layer_blocks[-1](last_inner))\n        results.append(self.get_result_from_layer_blocks(last_inner, -1))\n\n        for idx in range(len(x) - 2, -1, -1):\n            inner_lateral = self.get_result_from_inner_blocks(x[idx], idx)\n            feat_shape = inner_lateral.shape[-2:]\n            inner_top_down = F.interpolate(last_inner, size=feat_shape, mode=\"nearest\")\n            last_inner = inner_lateral + inner_top_down\n            results.insert(0, self.get_result_from_layer_blocks(last_inner, idx))\n\n        # 在layer4对应的预测特征层基础上生成预测特征矩阵5\n        if self.extra_blocks is not None:\n            results, names = self.extra_blocks(results, x, names)\n\n        # make it back an OrderedDict\n        out = OrderedDict([(k, v) for k, v in zip(names, results)])\n\n        return out\n\n\nclass LastLevelMaxPool(torch.nn.Module):\n    \"\"\"\n    Applies a max_pool2d on top of the last feature map\n    \"\"\"\n\n    def forward(self, x: List[Tensor], y: List[Tensor], names: List[str]) -> Tuple[List[Tensor], List[str]]:\n        names.append(\"pool\")\n        x.append(F.max_pool2d(x[-1], 1, 2, 0))\n        return x, names\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/backbone/resnet50_fpn_model.py",
    "content": "import os\n\nimport torch\nimport torch.nn as nn\nfrom torchvision.ops.misc import FrozenBatchNorm2d\n\nfrom .feature_pyramid_network import BackboneWithFPN, LastLevelMaxPool\n\n\nclass Bottleneck(nn.Module):\n    expansion = 4\n\n    def __init__(self, in_channel, out_channel, stride=1, downsample=None, norm_layer=None):\n        super().__init__()\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n\n        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,\n                               kernel_size=1, stride=1, bias=False)  # squeeze channels\n        self.bn1 = norm_layer(out_channel)\n        # -----------------------------------------\n        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,\n                               kernel_size=3, stride=stride, bias=False, padding=1)\n        self.bn2 = norm_layer(out_channel)\n        # -----------------------------------------\n        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel * self.expansion,\n                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels\n        self.bn3 = norm_layer(out_channel * self.expansion)\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n\n    def forward(self, x):\n        identity = x\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass ResNet(nn.Module):\n\n    def __init__(self, block, blocks_num, num_classes=1000, include_top=True, norm_layer=None):\n        super().__init__()\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n        self._norm_layer = norm_layer\n\n        self.include_top = include_top\n        self.in_channel = 64\n\n        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,\n                               padding=3, bias=False)\n        self.bn1 = norm_layer(self.in_channel)\n        self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.layer1 = self._make_layer(block, 64, blocks_num[0])\n        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)\n        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)\n        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)\n        if self.include_top:\n            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)\n            self.fc = nn.Linear(512 * block.expansion, num_classes)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')\n\n    def _make_layer(self, block, channel, block_num, stride=1):\n        norm_layer = self._norm_layer\n        downsample = None\n        if stride != 1 or self.in_channel != channel * block.expansion:\n            downsample = nn.Sequential(\n                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),\n                norm_layer(channel * block.expansion))\n\n        layers = []\n        layers.append(block(self.in_channel, channel, downsample=downsample,\n                            stride=stride, norm_layer=norm_layer))\n        self.in_channel = channel * block.expansion\n\n        for _ in range(1, block_num):\n            layers.append(block(self.in_channel, channel, norm_layer=norm_layer))\n\n        return nn.Sequential(*layers)\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n\n        if self.include_top:\n            x = self.avgpool(x)\n            x = torch.flatten(x, 1)\n            x = self.fc(x)\n\n        return x\n\n\ndef overwrite_eps(model, eps):\n    \"\"\"\n    This method overwrites the default eps values of all the\n    FrozenBatchNorm2d layers of the model with the provided value.\n    This is necessary to address the BC-breaking change introduced\n    by the bug-fix at pytorch/vision#2933. The overwrite is applied\n    only when the pretrained weights are loaded to maintain compatibility\n    with previous versions.\n\n    Args:\n        model (nn.Module): The model on which we perform the overwrite.\n        eps (float): The new value of eps.\n    \"\"\"\n    for module in model.modules():\n        if isinstance(module, FrozenBatchNorm2d):\n            module.eps = eps\n\n\ndef resnet50_fpn_backbone(pretrain_path=\"\",\n                          norm_layer=nn.BatchNorm2d,\n                          trainable_layers=3,\n                          returned_layers=None,\n                          extra_blocks=None):\n    \"\"\"\n    搭建resnet50_fpn——backbone\n    Args:\n        pretrain_path: resnet50的预训练权重，如果不使用就默认为空\n        norm_layer: 默认是nn.BatchNorm2d，如果GPU显存很小，batch_size不能设置很大，\n                    建议将norm_layer设置成FrozenBatchNorm2d(默认是nn.BatchNorm2d)\n                    (https://github.com/facebookresearch/maskrcnn-benchmark/issues/267)\n        trainable_layers: 指定训练哪些层结构\n        returned_layers: 指定哪些层的输出需要返回\n        extra_blocks: 在输出的特征层基础上额外添加的层结构\n\n    Returns:\n\n    \"\"\"\n    resnet_backbone = ResNet(Bottleneck, [3, 4, 6, 3],\n                             include_top=False,\n                             norm_layer=norm_layer)\n\n    if isinstance(norm_layer, FrozenBatchNorm2d):\n        overwrite_eps(resnet_backbone, 0.0)\n\n    if pretrain_path != \"\":\n        assert os.path.exists(pretrain_path), \"{} is not exist.\".format(pretrain_path)\n        # 载入预训练权重\n        print(resnet_backbone.load_state_dict(torch.load(pretrain_path), strict=False))\n\n    # select layers that wont be frozen\n    assert 0 <= trainable_layers <= 5\n    layers_to_train = ['layer4', 'layer3', 'layer2', 'layer1', 'conv1'][:trainable_layers]\n\n    # 如果要训练所有层结构的话，不要忘了conv1后还有一个bn1\n    if trainable_layers == 5:\n        layers_to_train.append(\"bn1\")\n\n    # freeze layers\n    for name, parameter in resnet_backbone.named_parameters():\n        # 只训练不在layers_to_train列表中的层结构\n        if all([not name.startswith(layer) for layer in layers_to_train]):\n            parameter.requires_grad_(False)\n\n    if extra_blocks is None:\n        extra_blocks = LastLevelMaxPool()\n\n    if returned_layers is None:\n        returned_layers = [1, 2, 3, 4]\n    # 返回的特征层个数肯定大于0小于5\n    assert min(returned_layers) > 0 and max(returned_layers) < 5\n\n    # return_layers = {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'}\n    return_layers = {f'layer{k}': str(v) for v, k in enumerate(returned_layers)}\n\n    # in_channel 为layer4的输出特征矩阵channel = 2048\n    in_channels_stage2 = resnet_backbone.in_channel // 8  # 256\n    # 记录resnet50提供给fpn的每个特征层channel\n    in_channels_list = [in_channels_stage2 * 2 ** (i - 1) for i in returned_layers]\n    # 通过fpn后得到的每个特征层的channel\n    out_channels = 256\n    return BackboneWithFPN(resnet_backbone, return_layers, in_channels_list, out_channels, extra_blocks=extra_blocks)\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/coco91_indices.json",
    "content": "{\n    \"1\": \"person\",\n    \"2\": \"bicycle\",\n    \"3\": \"car\",\n    \"4\": \"motorcycle\",\n    \"5\": \"airplane\",\n    \"6\": \"bus\",\n    \"7\": \"train\",\n    \"8\": \"truck\",\n    \"9\": \"boat\",\n    \"10\": \"traffic light\",\n    \"11\": \"fire hydrant\",\n    \"12\": \"N/A\",\n    \"13\": \"stop sign\",\n    \"14\": \"parking meter\",\n    \"15\": \"bench\",\n    \"16\": \"bird\",\n    \"17\": \"cat\",\n    \"18\": \"dog\",\n    \"19\": \"horse\",\n    \"20\": \"sheep\",\n    \"21\": \"cow\",\n    \"22\": \"elephant\",\n    \"23\": \"bear\",\n    \"24\": \"zebra\",\n    \"25\": \"giraffe\",\n    \"26\": \"N/A\",\n    \"27\": \"backpack\",\n    \"28\": \"umbrella\",\n    \"29\": \"N/A\",\n    \"30\": \"N/A\",\n    \"31\": \"handbag\",\n    \"32\": \"tie\",\n    \"33\": \"suitcase\",\n    \"34\": \"frisbee\",\n    \"35\": \"skis\",\n    \"36\": \"snowboard\",\n    \"37\": \"sports ball\",\n    \"38\": \"kite\",\n    \"39\": \"baseball bat\",\n    \"40\": \"baseball glove\",\n    \"41\": \"skateboard\",\n    \"42\": \"surfboard\",\n    \"43\": \"tennis racket\",\n    \"44\": \"bottle\",\n    \"45\": \"N/A\",\n    \"46\": \"wine glass\",\n    \"47\": \"cup\",\n    \"48\": \"fork\",\n    \"49\": \"knife\",\n    \"50\": \"spoon\",\n    \"51\": \"bowl\",\n    \"52\": \"banana\",\n    \"53\": \"apple\",\n    \"54\": \"sandwich\",\n    \"55\": \"orange\",\n    \"56\": \"broccoli\",\n    \"57\": \"carrot\",\n    \"58\": \"hot dog\",\n    \"59\": \"pizza\",\n    \"60\": \"donut\",\n    \"61\": \"cake\",\n    \"62\": \"chair\",\n    \"63\": \"couch\",\n    \"64\": \"potted plant\",\n    \"65\": \"bed\",\n    \"66\": \"N/A\",\n    \"67\": \"dining table\",\n    \"68\": \"N/A\",\n    \"69\": \"N/A\",\n    \"70\": \"toilet\",\n    \"71\": \"N/A\",\n    \"72\": \"tv\",\n    \"73\": \"laptop\",\n    \"74\": \"mouse\",\n    \"75\": \"remote\",\n    \"76\": \"keyboard\",\n    \"77\": \"cell phone\",\n    \"78\": \"microwave\",\n    \"79\": \"oven\",\n    \"80\": \"toaster\",\n    \"81\": \"sink\",\n    \"82\": \"refrigerator\",\n    \"83\": \"N/A\",\n    \"84\": \"book\",\n    \"85\": \"clock\",\n    \"86\": \"vase\",\n    \"87\": \"scissors\",\n    \"88\": \"teddy bear\",\n    \"89\": \"hair drier\",\n    \"90\": \"toothbrush\"\n}"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/det_results20220406-141544.txt",
    "content": "epoch:0 0.171  0.342  0.154  0.099  0.211  0.213  0.184  0.315  0.334  0.168  0.375  0.440  1.3826  0.08\nepoch:1 0.230  0.419  0.230  0.132  0.266  0.288  0.224  0.374  0.395  0.216  0.435  0.512  1.0356  0.08\nepoch:2 0.242  0.435  0.244  0.133  0.272  0.313  0.233  0.393  0.416  0.232  0.452  0.532  0.9718  0.08\nepoch:3 0.261  0.456  0.269  0.145  0.284  0.326  0.248  0.415  0.440  0.260  0.475  0.550  0.9363  0.08\nepoch:4 0.266  0.458  0.277  0.150  0.301  0.337  0.250  0.409  0.433  0.245  0.467  0.564  0.9145  0.08\nepoch:5 0.272  0.465  0.286  0.155  0.309  0.348  0.251  0.407  0.429  0.247  0.461  0.561  0.8982  0.08\nepoch:6 0.288  0.482  0.303  0.163  0.321  0.363  0.263  0.431  0.452  0.265  0.491  0.570  0.8859  0.08\nepoch:7 0.287  0.483  0.302  0.164  0.320  0.363  0.268  0.432  0.454  0.268  0.483  0.584  0.8771  0.08\nepoch:8 0.298  0.492  0.318  0.166  0.336  0.377  0.268  0.434  0.454  0.265  0.500  0.580  0.8685  0.08\nepoch:9 0.289  0.484  0.306  0.156  0.325  0.374  0.263  0.428  0.450  0.252  0.490  0.589  0.8612  0.08\nepoch:10 0.297  0.489  0.316  0.167  0.330  0.381  0.270  0.436  0.459  0.258  0.501  0.579  0.8547  0.08\nepoch:11 0.299  0.494  0.317  0.171  0.335  0.382  0.272  0.439  0.461  0.276  0.501  0.586  0.8498  0.08\nepoch:12 0.301  0.497  0.321  0.178  0.333  0.390  0.270  0.443  0.466  0.277  0.505  0.600  0.8461  0.08\nepoch:13 0.307  0.503  0.327  0.175  0.345  0.388  0.276  0.441  0.465  0.269  0.510  0.574  0.8409  0.08\nepoch:14 0.299  0.491  0.319  0.171  0.339  0.372  0.271  0.445  0.470  0.284  0.508  0.593  0.8355  0.08\nepoch:15 0.306  0.503  0.324  0.166  0.342  0.396  0.278  0.443  0.468  0.271  0.511  0.598  0.8330  0.08\nepoch:16 0.374  0.579  0.407  0.214  0.415  0.476  0.311  0.500  0.526  0.325  0.573  0.659  0.7421  0.008\nepoch:17 0.379  0.587  0.409  0.214  0.420  0.484  0.316  0.502  0.528  0.322  0.569  0.668  0.7157  0.008\nepoch:18 0.380  0.587  0.411  0.214  0.423  0.486  0.315  0.503  0.528  0.323  0.571  0.669  0.7016  0.008\nepoch:19 0.381  0.588  0.413  0.216  0.422  0.490  0.317  0.508  0.532  0.332  0.574  0.676  0.6897  0.008\nepoch:20 0.379  0.586  0.410  0.212  0.418  0.488  0.313  0.499  0.523  0.317  0.566  0.667  0.6802  0.008\nepoch:21 0.378  0.587  0.408  0.210  0.418  0.488  0.314  0.496  0.520  0.314  0.560  0.667  0.6708  0.008\nepoch:22 0.381  0.588  0.411  0.213  0.420  0.495  0.316  0.500  0.524  0.318  0.567  0.673  0.6497  0.0008\nepoch:23 0.381  0.588  0.411  0.215  0.420  0.492  0.315  0.499  0.523  0.319  0.565  0.666  0.6447  0.0008\nepoch:24 0.381  0.588  0.412  0.214  0.419  0.495  0.316  0.499  0.523  0.317  0.565  0.669  0.6421  0.0008\nepoch:25 0.380  0.585  0.411  0.214  0.419  0.494  0.314  0.498  0.522  0.316  0.566  0.664  0.6398  0.0008\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/draw_box_utils.py",
    "content": "from PIL.Image import Image, fromarray\nimport PIL.ImageDraw as ImageDraw\nimport PIL.ImageFont as ImageFont\nfrom PIL import ImageColor\nimport numpy as np\n\nSTANDARD_COLORS = [\n    'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',\n    'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',\n    'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',\n    'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',\n    'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',\n    'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',\n    'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',\n    'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',\n    'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',\n    'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',\n    'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',\n    'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',\n    'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',\n    'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',\n    'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',\n    'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',\n    'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',\n    'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',\n    'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',\n    'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',\n    'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',\n    'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',\n    'WhiteSmoke', 'Yellow', 'YellowGreen'\n]\n\n\ndef draw_text(draw,\n              box: list,\n              cls: int,\n              score: float,\n              category_index: dict,\n              color: str,\n              font: str = 'arial.ttf',\n              font_size: int = 24):\n    \"\"\"\n    将目标边界框和类别信息绘制到图片上\n    \"\"\"\n    try:\n        font = ImageFont.truetype(font, font_size)\n    except IOError:\n        font = ImageFont.load_default()\n\n    left, top, right, bottom = box\n    # If the total height of the display strings added to the top of the bounding\n    # box exceeds the top of the image, stack the strings below the bounding box\n    # instead of above.\n    display_str = f\"{category_index[str(cls)]}: {int(100 * score)}%\"\n    display_str_heights = [font.getsize(ds)[1] for ds in display_str]\n    # Each display_str has a top and bottom margin of 0.05x.\n    display_str_height = (1 + 2 * 0.05) * max(display_str_heights)\n\n    if top > display_str_height:\n        text_top = top - display_str_height\n        text_bottom = top\n    else:\n        text_top = bottom\n        text_bottom = bottom + display_str_height\n\n    for ds in display_str:\n        text_width, text_height = font.getsize(ds)\n        margin = np.ceil(0.05 * text_width)\n        draw.rectangle([(left, text_top),\n                        (left + text_width + 2 * margin, text_bottom)], fill=color)\n        draw.text((left + margin, text_top),\n                  ds,\n                  fill='black',\n                  font=font)\n        left += text_width\n\n\ndef draw_masks(image, masks, colors, thresh: float = 0.7, alpha: float = 0.5):\n    np_image = np.array(image)\n    masks = np.where(masks > thresh, True, False)\n\n    # colors = np.array(colors)\n    img_to_draw = np.copy(np_image)\n    # TODO: There might be a way to vectorize this\n    for mask, color in zip(masks, colors):\n        img_to_draw[mask] = color\n\n    out = np_image * (1 - alpha) + img_to_draw * alpha\n    return fromarray(out.astype(np.uint8))\n\n\ndef draw_objs(image: Image,\n              boxes: np.ndarray = None,\n              classes: np.ndarray = None,\n              scores: np.ndarray = None,\n              masks: np.ndarray = None,\n              category_index: dict = None,\n              box_thresh: float = 0.1,\n              mask_thresh: float = 0.5,\n              line_thickness: int = 8,\n              font: str = 'arial.ttf',\n              font_size: int = 24,\n              draw_boxes_on_image: bool = True,\n              draw_masks_on_image: bool = True):\n    \"\"\"\n    将目标边界框信息，类别信息，mask信息绘制在图片上\n    Args:\n        image: 需要绘制的图片\n        boxes: 目标边界框信息\n        classes: 目标类别信息\n        scores: 目标概率信息\n        masks: 目标mask信息\n        category_index: 类别与名称字典\n        box_thresh: 过滤的概率阈值\n        mask_thresh:\n        line_thickness: 边界框宽度\n        font: 字体类型\n        font_size: 字体大小\n        draw_boxes_on_image:\n        draw_masks_on_image:\n\n    Returns:\n\n    \"\"\"\n\n    # 过滤掉低概率的目标\n    idxs = np.greater(scores, box_thresh)\n    boxes = boxes[idxs]\n    classes = classes[idxs]\n    scores = scores[idxs]\n    if masks is not None:\n        masks = masks[idxs]\n    if len(boxes) == 0:\n        return image\n\n    colors = [ImageColor.getrgb(STANDARD_COLORS[cls % len(STANDARD_COLORS)]) for cls in classes]\n\n    if draw_boxes_on_image:\n        # Draw all boxes onto image.\n        draw = ImageDraw.Draw(image)\n        for box, cls, score, color in zip(boxes, classes, scores, colors):\n            left, top, right, bottom = box\n            # 绘制目标边界框\n            draw.line([(left, top), (left, bottom), (right, bottom),\n                       (right, top), (left, top)], width=line_thickness, fill=color)\n            # 绘制类别和概率信息\n            draw_text(draw, box.tolist(), int(cls), float(score), category_index, color, font, font_size)\n\n    if draw_masks_on_image and (masks is not None):\n        # Draw all mask onto image.\n        image = draw_masks(image, masks, colors, mask_thresh)\n\n    return image\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/my_dataset_coco.py",
    "content": "import os\nimport json\n\nimport torch\nfrom PIL import Image\nimport torch.utils.data as data\nfrom pycocotools.coco import COCO\nfrom train_utils import coco_remove_images_without_annotations, convert_coco_poly_mask\n\n\nclass CocoDetection(data.Dataset):\n    \"\"\"`MS Coco Detection <https://cocodataset.org/>`_ Dataset.\n\n    Args:\n        root (string): Root directory where images are downloaded to.\n        dataset (string): train or val.\n        transforms (callable, optional): A function/transform that takes input sample and its target as entry\n            and returns a transformed version.\n    \"\"\"\n\n    def __init__(self, root, dataset=\"train\", transforms=None, years=\"2017\"):\n        super(CocoDetection, self).__init__()\n        assert dataset in [\"train\", \"val\"], 'dataset must be in [\"train\", \"val\"]'\n        anno_file = f\"instances_{dataset}{years}.json\"\n        assert os.path.exists(root), \"file '{}' does not exist.\".format(root)\n        self.img_root = os.path.join(root, f\"{dataset}{years}\")\n        assert os.path.exists(self.img_root), \"path '{}' does not exist.\".format(self.img_root)\n        self.anno_path = os.path.join(root, \"annotations\", anno_file)\n        assert os.path.exists(self.anno_path), \"file '{}' does not exist.\".format(self.anno_path)\n\n        self.mode = dataset\n        self.transforms = transforms\n        self.coco = COCO(self.anno_path)\n\n        # 获取coco数据索引与类别名称的关系\n        # 注意在object80中的索引并不是连续的，虽然只有80个类别，但索引还是按照stuff91来排序的\n        data_classes = dict([(v[\"id\"], v[\"name\"]) for k, v in self.coco.cats.items()])\n        max_index = max(data_classes.keys())  # 90\n        # 将缺失的类别名称设置成N/A\n        coco_classes = {}\n        for k in range(1, max_index + 1):\n            if k in data_classes:\n                coco_classes[k] = data_classes[k]\n            else:\n                coco_classes[k] = \"N/A\"\n\n        if dataset == \"train\":\n            json_str = json.dumps(coco_classes, indent=4)\n            with open(\"coco91_indices.json\", \"w\") as f:\n                f.write(json_str)\n\n        self.coco_classes = coco_classes\n\n        ids = list(sorted(self.coco.imgs.keys()))\n        if dataset == \"train\":\n            # 移除没有目标，或者目标面积非常小的数据\n            valid_ids = coco_remove_images_without_annotations(self.coco, ids)\n            self.ids = valid_ids\n        else:\n            self.ids = ids\n\n    def parse_targets(self,\n                      img_id: int,\n                      coco_targets: list,\n                      w: int = None,\n                      h: int = None):\n        assert w > 0\n        assert h > 0\n\n        # 只筛选出单个对象的情况\n        anno = [obj for obj in coco_targets if obj['iscrowd'] == 0]\n\n        boxes = [obj[\"bbox\"] for obj in anno]\n\n        # guard against no boxes via resizing\n        boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)\n        # [xmin, ymin, w, h] -> [xmin, ymin, xmax, ymax]\n        boxes[:, 2:] += boxes[:, :2]\n        boxes[:, 0::2].clamp_(min=0, max=w)\n        boxes[:, 1::2].clamp_(min=0, max=h)\n\n        classes = [obj[\"category_id\"] for obj in anno]\n        classes = torch.tensor(classes, dtype=torch.int64)\n\n        area = torch.tensor([obj[\"area\"] for obj in anno])\n        iscrowd = torch.tensor([obj[\"iscrowd\"] for obj in anno])\n\n        segmentations = [obj[\"segmentation\"] for obj in anno]\n        masks = convert_coco_poly_mask(segmentations, h, w)\n\n        # 筛选出合法的目标，即x_max>x_min且y_max>y_min\n        keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])\n        boxes = boxes[keep]\n        classes = classes[keep]\n        masks = masks[keep]\n        area = area[keep]\n        iscrowd = iscrowd[keep]\n\n        target = {}\n        target[\"boxes\"] = boxes\n        target[\"labels\"] = classes\n        target[\"masks\"] = masks\n        target[\"image_id\"] = torch.tensor([img_id])\n\n        # for conversion to coco api\n        target[\"area\"] = area\n        target[\"iscrowd\"] = iscrowd\n\n        return target\n\n    def __getitem__(self, index):\n        \"\"\"\n        Args:\n            index (int): Index\n\n        Returns:\n            tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``.\n        \"\"\"\n        coco = self.coco\n        img_id = self.ids[index]\n        ann_ids = coco.getAnnIds(imgIds=img_id)\n        coco_target = coco.loadAnns(ann_ids)\n\n        path = coco.loadImgs(img_id)[0]['file_name']\n        img = Image.open(os.path.join(self.img_root, path)).convert('RGB')\n\n        w, h = img.size\n        target = self.parse_targets(img_id, coco_target, w, h)\n        if self.transforms is not None:\n            img, target = self.transforms(img, target)\n\n        return img, target\n\n    def __len__(self):\n        return len(self.ids)\n\n    def get_height_and_width(self, index):\n        coco = self.coco\n        img_id = self.ids[index]\n\n        img_info = coco.loadImgs(img_id)[0]\n        w = img_info[\"width\"]\n        h = img_info[\"height\"]\n        return h, w\n\n    @staticmethod\n    def collate_fn(batch):\n        return tuple(zip(*batch))\n\n\nif __name__ == '__main__':\n    train = CocoDetection(\"/data/coco2017\", dataset=\"train\")\n    print(len(train))\n    t = train[0]\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/my_dataset_voc.py",
    "content": "import os\nimport json\n\nfrom lxml import etree\nimport numpy as np\nfrom PIL import Image\nimport torch\nfrom torch.utils.data import Dataset\nfrom train_utils import convert_to_coco_api\n\n\nclass VOCInstances(Dataset):\n    def __init__(self, voc_root, year=\"2012\", txt_name: str = \"train.txt\", transforms=None):\n        super().__init__()\n        if isinstance(year, int):\n            year = str(year)\n        assert year in [\"2007\", \"2012\"], \"year must be in ['2007', '2012']\"\n        if \"VOCdevkit\" in voc_root:\n            root = os.path.join(voc_root, f\"VOC{year}\")\n        else:\n            root = os.path.join(voc_root, \"VOCdevkit\", f\"VOC{year}\")\n        assert os.path.exists(root), \"path '{}' does not exist.\".format(root)\n        image_dir = os.path.join(root, 'JPEGImages')\n        xml_dir = os.path.join(root, 'Annotations')\n        mask_dir = os.path.join(root, 'SegmentationObject')\n\n        txt_path = os.path.join(root, \"ImageSets\", \"Segmentation\", txt_name)\n        assert os.path.exists(txt_path), \"file '{}' does not exist.\".format(txt_path)\n        with open(os.path.join(txt_path), \"r\") as f:\n            file_names = [x.strip() for x in f.readlines() if len(x.strip()) > 0]\n\n        # read class_indict\n        json_file = 'pascal_voc_indices.json'\n        assert os.path.exists(json_file), \"{} file not exist.\".format(json_file)\n        with open(json_file, 'r') as f:\n            idx2classes = json.load(f)\n            self.class_dict = dict([(v, k) for k, v in idx2classes.items()])\n\n        self.images_path = []     # 存储图片路径\n        self.xmls_path = []       # 存储xml文件路径\n        self.xmls_info = []       # 存储解析的xml字典文件\n        self.masks_path = []      # 存储SegmentationObject图片路径\n        self.objects_bboxes = []  # 存储解析的目标boxes等信息\n        self.masks = []           # 存储读取的SegmentationObject图片信息\n\n        # 检查图片、xml文件以及mask是否都在\n        images_path = [os.path.join(image_dir, x + \".jpg\") for x in file_names]\n        xmls_path = [os.path.join(xml_dir, x + '.xml') for x in file_names]\n        masks_path = [os.path.join(mask_dir, x + \".png\") for x in file_names]\n        for idx, (img_path, xml_path, mask_path) in enumerate(zip(images_path, xmls_path, masks_path)):\n            assert os.path.exists(img_path), f\"not find {img_path}\"\n            assert os.path.exists(xml_path), f\"not find {xml_path}\"\n            assert os.path.exists(mask_path), f\"not find {mask_path}\"\n\n            # 解析xml中bbox信息\n            with open(xml_path) as fid:\n                xml_str = fid.read()\n            xml = etree.fromstring(xml_str)\n            obs_dict = parse_xml_to_dict(xml)[\"annotation\"]  # 将xml文件解析成字典\n            obs_bboxes = parse_objects(obs_dict, xml_path, self.class_dict, idx)  # 解析出目标信息\n            num_objs = obs_bboxes[\"boxes\"].shape[0]\n\n            # 读取SegmentationObject并检查是否和bboxes信息数量一致\n            instances_mask = Image.open(mask_path)\n            instances_mask = np.array(instances_mask)\n            instances_mask[instances_mask == 255] = 0  # 255为背景或者忽略掉的地方，这里为了方便直接设置为背景(0)\n\n            # 需要检查一下标注的bbox个数是否和instances个数一致\n            num_instances = instances_mask.max()\n            if num_objs != num_instances:\n                print(f\"warning: num_boxes:{num_objs} and num_instances:{num_instances} do not correspond. \"\n                      f\"skip image:{img_path}\")\n                continue\n\n            self.images_path.append(img_path)\n            self.xmls_path.append(xml_path)\n            self.xmls_info.append(obs_dict)\n            self.masks_path.append(mask_path)\n            self.objects_bboxes.append(obs_bboxes)\n            self.masks.append(instances_mask)\n\n        self.transforms = transforms\n        self.coco = convert_to_coco_api(self)\n\n    def parse_mask(self, idx: int):\n        mask = self.masks[idx]\n        c = mask.max()  # 有几个目标最大索引就等于几\n        masks = []\n        # 对每个目标的mask单独使用一个channel存放\n        for i in range(1, c+1):\n            masks.append(mask == i)\n        masks = np.stack(masks, axis=0)\n        return torch.as_tensor(masks, dtype=torch.uint8)\n\n    def __getitem__(self, idx):\n        \"\"\"\n        Args:\n            idx (int): Index\n\n        Returns:\n            tuple: (image, target) where target is the image segmentation.\n        \"\"\"\n        img = Image.open(self.images_path[idx]).convert('RGB')\n        target = self.objects_bboxes[idx]\n        masks = self.parse_mask(idx)\n        target[\"masks\"] = masks\n\n        if self.transforms is not None:\n            img, target = self.transforms(img, target)\n\n        return img, target\n\n    def __len__(self):\n        return len(self.images_path)\n\n    def get_height_and_width(self, idx):\n        \"\"\"方便统计所有图片的高宽比例信息\"\"\"\n        # read xml\n        data = self.xmls_info[idx]\n        data_height = int(data[\"size\"][\"height\"])\n        data_width = int(data[\"size\"][\"width\"])\n        return data_height, data_width\n\n    def get_annotations(self, idx):\n        \"\"\"方便构建COCO()\"\"\"\n        data = self.xmls_info[idx]\n        h = int(data[\"size\"][\"height\"])\n        w = int(data[\"size\"][\"width\"])\n        target = self.objects_bboxes[idx]\n        masks = self.parse_mask(idx)\n        target[\"masks\"] = masks\n        return target, h, w\n\n    @staticmethod\n    def collate_fn(batch):\n        return tuple(zip(*batch))\n\n\ndef parse_xml_to_dict(xml):\n    \"\"\"\n    将xml文件解析成字典形式，参考tensorflow的recursive_parse_xml_to_dict\n    Args:\n        xml: xml tree obtained by parsing XML file contents using lxml.etree\n\n    Returns:\n        Python dictionary holding XML contents.\n    \"\"\"\n\n    if len(xml) == 0:  # 遍历到底层，直接返回tag对应的信息\n        return {xml.tag: xml.text}\n\n    result = {}\n    for child in xml:\n        child_result = parse_xml_to_dict(child)  # 递归遍历标签信息\n        if child.tag != 'object':\n            result[child.tag] = child_result[child.tag]\n        else:\n            if child.tag not in result:  # 因为object可能有多个，所以需要放入列表里\n                result[child.tag] = []\n            result[child.tag].append(child_result[child.tag])\n    return {xml.tag: result}\n\n\ndef parse_objects(data: dict, xml_path: str, class_dict: dict, idx: int):\n    \"\"\"\n    解析出bboxes、labels、iscrowd以及ares等信息\n    Args:\n        data: 将xml解析成dict的Annotation数据\n        xml_path: 对应xml的文件路径\n        class_dict: 类别与索引对应关系\n        idx: 图片对应的索引\n\n    Returns:\n\n    \"\"\"\n    boxes = []\n    labels = []\n    iscrowd = []\n    assert \"object\" in data, \"{} lack of object information.\".format(xml_path)\n    for obj in data[\"object\"]:\n        xmin = float(obj[\"bndbox\"][\"xmin\"])\n        xmax = float(obj[\"bndbox\"][\"xmax\"])\n        ymin = float(obj[\"bndbox\"][\"ymin\"])\n        ymax = float(obj[\"bndbox\"][\"ymax\"])\n\n        # 进一步检查数据，有的标注信息中可能有w或h为0的情况，这样的数据会导致计算回归loss为nan\n        if xmax <= xmin or ymax <= ymin:\n            print(\"Warning: in '{}' xml, there are some bbox w/h <=0\".format(xml_path))\n            continue\n\n        boxes.append([xmin, ymin, xmax, ymax])\n        labels.append(int(class_dict[obj[\"name\"]]))\n        if \"difficult\" in obj:\n            iscrowd.append(int(obj[\"difficult\"]))\n        else:\n            iscrowd.append(0)\n\n    # convert everything into a torch.Tensor\n    boxes = torch.as_tensor(boxes, dtype=torch.float32)\n    labels = torch.as_tensor(labels, dtype=torch.int64)\n    iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)\n    image_id = torch.tensor([idx])\n    area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])\n\n    return {\"boxes\": boxes,\n            \"labels\": labels,\n            \"iscrowd\": iscrowd,\n            \"image_id\": image_id,\n            \"area\": area}\n\n\nif __name__ == '__main__':\n    dataset = VOCInstances(voc_root=\"/data/\")\n    print(len(dataset))\n    d1 = dataset[0]\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/network_files/__init__.py",
    "content": "from .faster_rcnn_framework import FasterRCNN, FastRCNNPredictor\nfrom .rpn_function import AnchorsGenerator\nfrom .mask_rcnn import MaskRCNN\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/network_files/boxes.py",
    "content": "import torch\nfrom typing import Tuple\nfrom torch import Tensor\nimport torchvision\n\n\ndef nms(boxes, scores, iou_threshold):\n    # type: (Tensor, Tensor, float) -> Tensor\n    \"\"\"\n    Performs non-maximum suppression (NMS) on the boxes according\n    to their intersection-over-union (IoU).\n\n    NMS iteratively removes lower scoring boxes which have an\n    IoU greater than iou_threshold with another (higher scoring)\n    box.\n\n    Parameters\n    ----------\n    boxes : Tensor[N, 4])\n        boxes to perform NMS on. They\n        are expected to be in (x1, y1, x2, y2) format\n    scores : Tensor[N]\n        scores for each one of the boxes\n    iou_threshold : float\n        discards all overlapping\n        boxes with IoU > iou_threshold\n\n    Returns\n    -------\n    keep : Tensor\n        int64 tensor with the indices\n        of the elements that have been kept\n        by NMS, sorted in decreasing order of scores\n    \"\"\"\n    return torch.ops.torchvision.nms(boxes, scores, iou_threshold)\n\n\ndef batched_nms(boxes, scores, idxs, iou_threshold):\n    # type: (Tensor, Tensor, Tensor, float) -> Tensor\n    \"\"\"\n    Performs non-maximum suppression in a batched fashion.\n\n    Each index value correspond to a category, and NMS\n    will not be applied between elements of different categories.\n\n    Parameters\n    ----------\n    boxes : Tensor[N, 4]\n        boxes where NMS will be performed. They\n        are expected to be in (x1, y1, x2, y2) format\n    scores : Tensor[N]\n        scores for each one of the boxes\n    idxs : Tensor[N]\n        indices of the categories for each one of the boxes.\n    iou_threshold : float\n        discards all overlapping boxes\n        with IoU < iou_threshold\n\n    Returns\n    -------\n    keep : Tensor\n        int64 tensor with the indices of\n        the elements that have been kept by NMS, sorted\n        in decreasing order of scores\n    \"\"\"\n    if boxes.numel() == 0:\n        return torch.empty((0,), dtype=torch.int64, device=boxes.device)\n\n    # strategy: in order to perform NMS independently per class.\n    # we add an offset to all the boxes. The offset is dependent\n    # only on the class idx, and is large enough so that boxes\n    # from different classes do not overlap\n    # 获取所有boxes中最大的坐标值（xmin, ymin, xmax, ymax）\n    max_coordinate = boxes.max()\n\n    # to(): Performs Tensor dtype and/or device conversion\n    # 为每一个类别/每一层生成一个很大的偏移量\n    # 这里的to只是让生成tensor的dytpe和device与boxes保持一致\n    offsets = idxs.to(boxes) * (max_coordinate + 1)\n    # boxes加上对应层的偏移量后，保证不同类别/层之间boxes不会有重合的现象\n    boxes_for_nms = boxes + offsets[:, None]\n    keep = nms(boxes_for_nms, scores, iou_threshold)\n    return keep\n\n\ndef remove_small_boxes(boxes, min_size):\n    # type: (Tensor, float) -> Tensor\n    \"\"\"\n    Remove boxes which contains at least one side smaller than min_size.\n    移除宽高小于指定阈值的索引\n    Arguments:\n        boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format\n        min_size (float): minimum size\n\n    Returns:\n        keep (Tensor[K]): indices of the boxes that have both sides\n            larger than min_size\n    \"\"\"\n    ws, hs = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1]  # 预测boxes的宽和高\n    # keep = (ws >= min_size) & (hs >= min_size)  # 当满足宽，高都大于给定阈值时为True\n    keep = torch.logical_and(torch.ge(ws, min_size), torch.ge(hs, min_size))\n    # nonzero(): Returns a tensor containing the indices of all non-zero elements of input\n    # keep = keep.nonzero().squeeze(1)\n    keep = torch.where(keep)[0]\n    return keep\n\n\ndef clip_boxes_to_image(boxes, size):\n    # type: (Tensor, Tuple[int, int]) -> Tensor\n    \"\"\"\n    Clip boxes so that they lie inside an image of size `size`.\n    裁剪预测的boxes信息，将越界的坐标调整到图片边界上\n\n    Arguments:\n        boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format\n        size (Tuple[height, width]): size of the image\n\n    Returns:\n        clipped_boxes (Tensor[N, 4])\n    \"\"\"\n    dim = boxes.dim()\n    boxes_x = boxes[..., 0::2]  # x1, x2\n    boxes_y = boxes[..., 1::2]  # y1, y2\n    height, width = size\n\n    if torchvision._is_tracing():\n        boxes_x = torch.max(boxes_x, torch.tensor(0, dtype=boxes.dtype, device=boxes.device))\n        boxes_x = torch.min(boxes_x, torch.tensor(width, dtype=boxes.dtype, device=boxes.device))\n        boxes_y = torch.max(boxes_y, torch.tensor(0, dtype=boxes.dtype, device=boxes.device))\n        boxes_y = torch.min(boxes_y, torch.tensor(height, dtype=boxes.dtype, device=boxes.device))\n    else:\n        boxes_x = boxes_x.clamp(min=0, max=width)   # 限制x坐标范围在[0,width]之间\n        boxes_y = boxes_y.clamp(min=0, max=height)  # 限制y坐标范围在[0,height]之间\n\n    clipped_boxes = torch.stack((boxes_x, boxes_y), dim=dim)\n    return clipped_boxes.reshape(boxes.shape)\n\n\ndef box_area(boxes):\n    \"\"\"\n    Computes the area of a set of bounding boxes, which are specified by its\n    (x1, y1, x2, y2) coordinates.\n\n    Arguments:\n        boxes (Tensor[N, 4]): boxes for which the area will be computed. They\n            are expected to be in (x1, y1, x2, y2) format\n\n    Returns:\n        area (Tensor[N]): area for each box\n    \"\"\"\n    return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])\n\n\ndef box_iou(boxes1, boxes2):\n    \"\"\"\n    Return intersection-over-union (Jaccard index) of boxes.\n\n    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.\n\n    Arguments:\n        boxes1 (Tensor[N, 4])\n        boxes2 (Tensor[M, 4])\n\n    Returns:\n        iou (Tensor[N, M]): the NxM matrix containing the pairwise\n            IoU values for every element in boxes1 and boxes2\n    \"\"\"\n    area1 = box_area(boxes1)\n    area2 = box_area(boxes2)\n\n    #  When the shapes do not match,\n    #  the shape of the returned output tensor follows the broadcasting rules\n    lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # left-top [N,M,2]\n    rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # right-bottom [N,M,2]\n\n    wh = (rb - lt).clamp(min=0)  # [N,M,2]\n    inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]\n\n    iou = inter / (area1[:, None] + area2 - inter)\n    return iou\n\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/network_files/det_utils.py",
    "content": "import torch\nimport math\nfrom typing import List, Tuple\nfrom torch import Tensor\n\n\nclass BalancedPositiveNegativeSampler(object):\n    \"\"\"\n    This class samples batches, ensuring that they contain a fixed proportion of positives\n    \"\"\"\n\n    def __init__(self, batch_size_per_image, positive_fraction):\n        # type: (int, float) -> None\n        \"\"\"\n        Arguments:\n            batch_size_per_image (int): number of elements to be selected per image\n            positive_fraction (float): percentage of positive elements per batch\n        \"\"\"\n        self.batch_size_per_image = batch_size_per_image\n        self.positive_fraction = positive_fraction\n\n    def __call__(self, matched_idxs):\n        # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]\n        \"\"\"\n        Arguments:\n            matched idxs: list of tensors containing -1, 0 or positive values.\n                Each tensor corresponds to a specific image.\n                -1 values are ignored, 0 are considered as negatives and > 0 as\n                positives.\n\n        Returns:\n            pos_idx (list[tensor])\n            neg_idx (list[tensor])\n\n        Returns two lists of binary masks for each image.\n        The first list contains the positive elements that were selected,\n        and the second list the negative example.\n        \"\"\"\n        pos_idx = []\n        neg_idx = []\n        # 遍历每张图像的matched_idxs\n        for matched_idxs_per_image in matched_idxs:\n            # >= 1的为正样本, nonzero返回非零元素索引\n            # positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1)\n            positive = torch.where(torch.ge(matched_idxs_per_image, 1))[0]\n            # = 0的为负样本\n            # negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1)\n            negative = torch.where(torch.eq(matched_idxs_per_image, 0))[0]\n\n            # 指定正样本的数量\n            num_pos = int(self.batch_size_per_image * self.positive_fraction)\n            # protect against not enough positive examples\n            # 如果正样本数量不够就直接采用所有正样本\n            num_pos = min(positive.numel(), num_pos)\n            # 指定负样本数量\n            num_neg = self.batch_size_per_image - num_pos\n            # protect against not enough negative examples\n            # 如果负样本数量不够就直接采用所有负样本\n            num_neg = min(negative.numel(), num_neg)\n\n            # randomly select positive and negative examples\n            # Returns a random permutation of integers from 0 to n - 1.\n            # 随机选择指定数量的正负样本\n            perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]\n            perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]\n\n            pos_idx_per_image = positive[perm1]\n            neg_idx_per_image = negative[perm2]\n\n            # create binary mask from indices\n            pos_idx_per_image_mask = torch.zeros_like(\n                matched_idxs_per_image, dtype=torch.uint8\n            )\n            neg_idx_per_image_mask = torch.zeros_like(\n                matched_idxs_per_image, dtype=torch.uint8\n            )\n\n            pos_idx_per_image_mask[pos_idx_per_image] = 1\n            neg_idx_per_image_mask[neg_idx_per_image] = 1\n\n            pos_idx.append(pos_idx_per_image_mask)\n            neg_idx.append(neg_idx_per_image_mask)\n\n        return pos_idx, neg_idx\n\n\n@torch.jit._script_if_tracing\ndef encode_boxes(reference_boxes, proposals, weights):\n    # type: (torch.Tensor, torch.Tensor, torch.Tensor) -> torch.Tensor\n    \"\"\"\n    Encode a set of proposals with respect to some\n    reference boxes\n\n    Arguments:\n        reference_boxes (Tensor): reference boxes(gt)\n        proposals (Tensor): boxes to be encoded(anchors)\n        weights:\n    \"\"\"\n\n    # perform some unpacking to make it JIT-fusion friendly\n    wx = weights[0]\n    wy = weights[1]\n    ww = weights[2]\n    wh = weights[3]\n\n    # unsqueeze()\n    # Returns a new tensor with a dimension of size one inserted at the specified position.\n    proposals_x1 = proposals[:, 0].unsqueeze(1)\n    proposals_y1 = proposals[:, 1].unsqueeze(1)\n    proposals_x2 = proposals[:, 2].unsqueeze(1)\n    proposals_y2 = proposals[:, 3].unsqueeze(1)\n\n    reference_boxes_x1 = reference_boxes[:, 0].unsqueeze(1)\n    reference_boxes_y1 = reference_boxes[:, 1].unsqueeze(1)\n    reference_boxes_x2 = reference_boxes[:, 2].unsqueeze(1)\n    reference_boxes_y2 = reference_boxes[:, 3].unsqueeze(1)\n\n    # implementation starts here\n    # parse widths and heights\n    ex_widths = proposals_x2 - proposals_x1\n    ex_heights = proposals_y2 - proposals_y1\n    # parse coordinate of center point\n    ex_ctr_x = proposals_x1 + 0.5 * ex_widths\n    ex_ctr_y = proposals_y1 + 0.5 * ex_heights\n\n    gt_widths = reference_boxes_x2 - reference_boxes_x1\n    gt_heights = reference_boxes_y2 - reference_boxes_y1\n    gt_ctr_x = reference_boxes_x1 + 0.5 * gt_widths\n    gt_ctr_y = reference_boxes_y1 + 0.5 * gt_heights\n\n    targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths\n    targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights\n    targets_dw = ww * torch.log(gt_widths / ex_widths)\n    targets_dh = wh * torch.log(gt_heights / ex_heights)\n\n    targets = torch.cat((targets_dx, targets_dy, targets_dw, targets_dh), dim=1)\n    return targets\n\n\nclass BoxCoder(object):\n    \"\"\"\n    This class encodes and decodes a set of bounding boxes into\n    the representation used for training the regressors.\n    \"\"\"\n\n    def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)):\n        # type: (Tuple[float, float, float, float], float) -> None\n        \"\"\"\n        Arguments:\n            weights (4-element tuple)\n            bbox_xform_clip (float)\n        \"\"\"\n        self.weights = weights\n        self.bbox_xform_clip = bbox_xform_clip\n\n    def encode(self, reference_boxes, proposals):\n        # type: (List[Tensor], List[Tensor]) -> List[Tensor]\n        \"\"\"\n        结合anchors和与之对应的gt计算regression参数\n        Args:\n            reference_boxes: List[Tensor] 每个proposal/anchor对应的gt_boxes\n            proposals: List[Tensor] anchors/proposals\n\n        Returns: regression parameters\n\n        \"\"\"\n        # 统计每张图像的anchors个数，方便后面拼接在一起处理后在分开\n        # reference_boxes和proposal数据结构相同\n        boxes_per_image = [len(b) for b in reference_boxes]\n        reference_boxes = torch.cat(reference_boxes, dim=0)\n        proposals = torch.cat(proposals, dim=0)\n\n        # targets_dx, targets_dy, targets_dw, targets_dh\n        targets = self.encode_single(reference_boxes, proposals)\n        return targets.split(boxes_per_image, 0)\n\n    def encode_single(self, reference_boxes, proposals):\n        \"\"\"\n        Encode a set of proposals with respect to some\n        reference boxes\n\n        Arguments:\n            reference_boxes (Tensor): reference boxes\n            proposals (Tensor): boxes to be encoded\n        \"\"\"\n        dtype = reference_boxes.dtype\n        device = reference_boxes.device\n        weights = torch.as_tensor(self.weights, dtype=dtype, device=device)\n        targets = encode_boxes(reference_boxes, proposals, weights)\n\n        return targets\n\n    def decode(self, rel_codes, boxes):\n        # type: (Tensor, List[Tensor]) -> Tensor\n        \"\"\"\n\n        Args:\n            rel_codes: bbox regression parameters\n            boxes: anchors/proposals\n\n        Returns:\n\n        \"\"\"\n        assert isinstance(boxes, (list, tuple))\n        assert isinstance(rel_codes, torch.Tensor)\n        boxes_per_image = [b.size(0) for b in boxes]\n        concat_boxes = torch.cat(boxes, dim=0)\n\n        box_sum = 0\n        for val in boxes_per_image:\n            box_sum += val\n\n        # 将预测的bbox回归参数应用到对应anchors上得到预测bbox的坐标\n        pred_boxes = self.decode_single(\n            rel_codes, concat_boxes\n        )\n\n        # 防止pred_boxes为空时导致reshape报错\n        if box_sum > 0:\n            pred_boxes = pred_boxes.reshape(box_sum, -1, 4)\n\n        return pred_boxes\n\n    def decode_single(self, rel_codes, boxes):\n        \"\"\"\n        From a set of original boxes and encoded relative box offsets,\n        get the decoded boxes.\n\n        Arguments:\n            rel_codes (Tensor): encoded boxes (bbox regression parameters)\n            boxes (Tensor): reference boxes (anchors/proposals)\n        \"\"\"\n        boxes = boxes.to(rel_codes.dtype)\n\n        # xmin, ymin, xmax, ymax\n        widths = boxes[:, 2] - boxes[:, 0]   # anchor/proposal宽度\n        heights = boxes[:, 3] - boxes[:, 1]  # anchor/proposal高度\n        ctr_x = boxes[:, 0] + 0.5 * widths   # anchor/proposal中心x坐标\n        ctr_y = boxes[:, 1] + 0.5 * heights  # anchor/proposal中心y坐标\n\n        wx, wy, ww, wh = self.weights  # RPN中为[1,1,1,1], fastrcnn中为[10,10,5,5]\n        dx = rel_codes[:, 0::4] / wx   # 预测anchors/proposals的中心坐标x回归参数\n        dy = rel_codes[:, 1::4] / wy   # 预测anchors/proposals的中心坐标y回归参数\n        dw = rel_codes[:, 2::4] / ww   # 预测anchors/proposals的宽度回归参数\n        dh = rel_codes[:, 3::4] / wh   # 预测anchors/proposals的高度回归参数\n\n        # limit max value, prevent sending too large values into torch.exp()\n        # self.bbox_xform_clip=math.log(1000. / 16)   4.135\n        dw = torch.clamp(dw, max=self.bbox_xform_clip)\n        dh = torch.clamp(dh, max=self.bbox_xform_clip)\n\n        pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]\n        pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]\n        pred_w = torch.exp(dw) * widths[:, None]\n        pred_h = torch.exp(dh) * heights[:, None]\n\n        # xmin\n        pred_boxes1 = pred_ctr_x - torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w\n        # ymin\n        pred_boxes2 = pred_ctr_y - torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h\n        # xmax\n        pred_boxes3 = pred_ctr_x + torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w\n        # ymax\n        pred_boxes4 = pred_ctr_y + torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h\n\n        pred_boxes = torch.stack((pred_boxes1, pred_boxes2, pred_boxes3, pred_boxes4), dim=2).flatten(1)\n        return pred_boxes\n\n\nclass Matcher(object):\n    BELOW_LOW_THRESHOLD = -1\n    BETWEEN_THRESHOLDS = -2\n\n    __annotations__ = {\n        'BELOW_LOW_THRESHOLD': int,\n        'BETWEEN_THRESHOLDS': int,\n    }\n\n    def __init__(self, high_threshold, low_threshold, allow_low_quality_matches=False):\n        # type: (float, float, bool) -> None\n        \"\"\"\n        Args:\n            high_threshold (float): quality values greater than or equal to\n                this value are candidate matches.\n            low_threshold (float): a lower quality threshold used to stratify\n                matches into three levels:\n                1) matches >= high_threshold\n                2) BETWEEN_THRESHOLDS matches in [low_threshold, high_threshold)\n                3) BELOW_LOW_THRESHOLD matches in [0, low_threshold)\n            allow_low_quality_matches (bool): if True, produce additional matches\n                for predictions that have only low-quality match candidates. See\n                set_low_quality_matches_ for more details.\n        \"\"\"\n        self.BELOW_LOW_THRESHOLD = -1\n        self.BETWEEN_THRESHOLDS = -2\n        assert low_threshold <= high_threshold\n        self.high_threshold = high_threshold  # 0.7\n        self.low_threshold = low_threshold    # 0.3\n        self.allow_low_quality_matches = allow_low_quality_matches\n\n    def __call__(self, match_quality_matrix):\n        \"\"\"\n        计算anchors与每个gtboxes匹配的iou最大值，并记录索引，\n        iou<low_threshold索引值为-1， low_threshold<=iou<high_threshold索引值为-2\n        Args:\n            match_quality_matrix (Tensor[float]): an MxN tensor, containing the\n            pairwise quality between M ground-truth elements and N predicted elements.\n\n        Returns:\n            matches (Tensor[int64]): an N tensor where N[i] is a matched gt in\n            [0, M - 1] or a negative value indicating that prediction i could not\n            be matched.\n        \"\"\"\n        if match_quality_matrix.numel() == 0:\n            # empty targets or proposals not supported during training\n            if match_quality_matrix.shape[0] == 0:\n                raise ValueError(\n                    \"No ground-truth boxes available for one of the images \"\n                    \"during training\")\n            else:\n                raise ValueError(\n                    \"No proposal boxes available for one of the images \"\n                    \"during training\")\n\n        # match_quality_matrix is M (gt) x N (predicted)\n        # Max over gt elements (dim 0) to find best gt candidate for each prediction\n        # M x N 的每一列代表一个anchors与所有gt的匹配iou值\n        # matched_vals代表每列的最大值，即每个anchors与所有gt匹配的最大iou值\n        # matches对应最大值所在的索引\n        matched_vals, matches = match_quality_matrix.max(dim=0)  # the dimension to reduce.\n        if self.allow_low_quality_matches:\n            all_matches = matches.clone()\n        else:\n            all_matches = None\n\n        # Assign candidate matches with low quality to negative (unassigned) values\n        # 计算iou小于low_threshold的索引\n        below_low_threshold = matched_vals < self.low_threshold\n        # 计算iou在low_threshold与high_threshold之间的索引值\n        between_thresholds = (matched_vals >= self.low_threshold) & (\n            matched_vals < self.high_threshold\n        )\n        # iou小于low_threshold的matches索引置为-1\n        matches[below_low_threshold] = self.BELOW_LOW_THRESHOLD  # -1\n\n        # iou在[low_threshold, high_threshold]之间的matches索引置为-2\n        matches[between_thresholds] = self.BETWEEN_THRESHOLDS    # -2\n\n        if self.allow_low_quality_matches:\n            assert all_matches is not None\n            self.set_low_quality_matches_(matches, all_matches, match_quality_matrix)\n\n        return matches\n\n    def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix):\n        \"\"\"\n        Produce additional matches for predictions that have only low-quality matches.\n        Specifically, for each ground-truth find the set of predictions that have\n        maximum overlap with it (including ties); for each prediction in that set, if\n        it is unmatched, then match it to the ground-truth with which it has the highest\n        quality value.\n        \"\"\"\n        # For each gt, find the prediction with which it has highest quality\n        # 对于每个gt boxes寻找与其iou最大的anchor，\n        # highest_quality_foreach_gt为匹配到的最大iou值\n        highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1)  # the dimension to reduce.\n\n        # Find highest quality match available, even if it is low, including ties\n        # 寻找每个gt boxes与其iou最大的anchor索引，一个gt匹配到的最大iou可能有多个anchor\n        # gt_pred_pairs_of_highest_quality = torch.nonzero(\n        #     match_quality_matrix == highest_quality_foreach_gt[:, None]\n        # )\n        gt_pred_pairs_of_highest_quality = torch.where(\n            torch.eq(match_quality_matrix, highest_quality_foreach_gt[:, None])\n        )\n        # Example gt_pred_pairs_of_highest_quality:\n        #   tensor([[    0, 39796],\n        #           [    1, 32055],\n        #           [    1, 32070],\n        #           [    2, 39190],\n        #           [    2, 40255],\n        #           [    3, 40390],\n        #           [    3, 41455],\n        #           [    4, 45470],\n        #           [    5, 45325],\n        #           [    5, 46390]])\n        # Each row is a (gt index, prediction index)\n        # Note how gt items 1, 2, 3, and 5 each have two ties\n\n        # gt_pred_pairs_of_highest_quality[:, 0]代表是对应的gt index(不需要)\n        # pre_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1]\n        pre_inds_to_update = gt_pred_pairs_of_highest_quality[1]\n        # 保留该anchor匹配gt最大iou的索引，即使iou低于设定的阈值\n        matches[pre_inds_to_update] = all_matches[pre_inds_to_update]\n\n\ndef smooth_l1_loss(input, target, beta: float = 1. / 9, size_average: bool = True):\n    \"\"\"\n    very similar to the smooth_l1_loss from pytorch, but with\n    the extra beta parameter\n    \"\"\"\n    n = torch.abs(input - target)\n    # cond = n < beta\n    cond = torch.lt(n, beta)\n    loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)\n    if size_average:\n        return loss.mean()\n    return loss.sum()\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/network_files/faster_rcnn_framework.py",
    "content": "import warnings\nfrom collections import OrderedDict\nfrom typing import Tuple, List, Dict, Optional, Union\n\nimport torch\nfrom torch import nn, Tensor\nimport torch.nn.functional as F\nfrom torchvision.ops import MultiScaleRoIAlign\n\nfrom .roi_head import RoIHeads\nfrom .transform import GeneralizedRCNNTransform\nfrom .rpn_function import AnchorsGenerator, RPNHead, RegionProposalNetwork\n\n\nclass FasterRCNNBase(nn.Module):\n    \"\"\"\n    Main class for Generalized R-CNN.\n\n    Arguments:\n        backbone (nn.Module):\n        rpn (nn.Module):\n        roi_heads (nn.Module): takes the features + the proposals from the RPN and computes\n            detections / masks from it.\n        transform (nn.Module): performs the data transformation from the inputs to feed into\n            the model\n    \"\"\"\n\n    def __init__(self, backbone, rpn, roi_heads, transform):\n        super(FasterRCNNBase, self).__init__()\n        self.transform = transform\n        self.backbone = backbone\n        self.rpn = rpn\n        self.roi_heads = roi_heads\n        # used only on torchscript mode\n        self._has_warned = False\n\n    @torch.jit.unused\n    def eager_outputs(self, losses, detections):\n        # type: (Dict[str, Tensor], List[Dict[str, Tensor]]) -> Union[Dict[str, Tensor], List[Dict[str, Tensor]]]\n        if self.training:\n            return losses\n\n        return detections\n\n    def forward(self, images, targets=None):\n        # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]\n        \"\"\"\n        Arguments:\n            images (list[Tensor]): images to be processed\n            targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional)\n\n        Returns:\n            result (list[BoxList] or dict[Tensor]): the output from the model.\n                During training, it returns a dict[Tensor] which contains the losses.\n                During testing, it returns list[BoxList] contains additional fields\n                like `scores`, `labels` and `mask` (for Mask R-CNN models).\n\n        \"\"\"\n        if self.training and targets is None:\n            raise ValueError(\"In training mode, targets should be passed\")\n\n        if self.training:\n            assert targets is not None\n            for target in targets:         # 进一步判断传入的target的boxes参数是否符合规定\n                boxes = target[\"boxes\"]\n                if isinstance(boxes, torch.Tensor):\n                    if len(boxes.shape) != 2 or boxes.shape[-1] != 4:\n                        raise ValueError(\"Expected target boxes to be a tensor\"\n                                         \"of shape [N, 4], got {:}.\".format(\n                                          boxes.shape))\n                else:\n                    raise ValueError(\"Expected target boxes to be of type \"\n                                     \"Tensor, got {:}.\".format(type(boxes)))\n\n        original_image_sizes = torch.jit.annotate(List[Tuple[int, int]], [])\n        for img in images:\n            val = img.shape[-2:]\n            assert len(val) == 2  # 防止输入的是个一维向量\n            original_image_sizes.append((val[0], val[1]))\n        # original_image_sizes = [img.shape[-2:] for img in images]\n\n        images, targets = self.transform(images, targets)  # 对图像进行预处理\n        # print(images.tensors.shape)\n        features = self.backbone(images.tensors)  # 将图像输入backbone得到特征图\n        if isinstance(features, torch.Tensor):  # 若只在一层特征层上预测，将feature放入有序字典中，并编号为‘0’\n            features = OrderedDict([('0', features)])  # 若在多层特征层上预测，传入的就是一个有序字典\n\n        # 将特征层以及标注target信息传入rpn中\n        # proposals: List[Tensor], Tensor_shape: [num_proposals, 4],\n        # 每个proposals是绝对坐标，且为(x1, y1, x2, y2)格式\n        proposals, proposal_losses = self.rpn(images, features, targets)\n\n        # 将rpn生成的数据以及标注target信息传入fast rcnn后半部分\n        detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets)\n\n        # 对网络的预测结果进行后处理（主要将bboxes还原到原图像尺度上）\n        detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)\n\n        losses = {}\n        losses.update(detector_losses)\n        losses.update(proposal_losses)\n\n        if torch.jit.is_scripting():\n            if not self._has_warned:\n                warnings.warn(\"RCNN always returns a (Losses, Detections) tuple in scripting\")\n                self._has_warned = True\n            return losses, detections\n        else:\n            return self.eager_outputs(losses, detections)\n\n        # if self.training:\n        #     return losses\n        #\n        # return detections\n\n\nclass TwoMLPHead(nn.Module):\n    \"\"\"\n    Standard heads for FPN-based models\n\n    Arguments:\n        in_channels (int): number of input channels\n        representation_size (int): size of the intermediate representation\n    \"\"\"\n\n    def __init__(self, in_channels, representation_size):\n        super(TwoMLPHead, self).__init__()\n\n        self.fc6 = nn.Linear(in_channels, representation_size)\n        self.fc7 = nn.Linear(representation_size, representation_size)\n\n    def forward(self, x):\n        x = x.flatten(start_dim=1)\n\n        x = F.relu(self.fc6(x))\n        x = F.relu(self.fc7(x))\n\n        return x\n\n\nclass FastRCNNPredictor(nn.Module):\n    \"\"\"\n    Standard classification + bounding box regression layers\n    for Fast R-CNN.\n\n    Arguments:\n        in_channels (int): number of input channels\n        num_classes (int): number of output classes (including background)\n    \"\"\"\n\n    def __init__(self, in_channels, num_classes):\n        super(FastRCNNPredictor, self).__init__()\n        self.cls_score = nn.Linear(in_channels, num_classes)\n        self.bbox_pred = nn.Linear(in_channels, num_classes * 4)\n\n    def forward(self, x):\n        if x.dim() == 4:\n            assert list(x.shape[2:]) == [1, 1]\n        x = x.flatten(start_dim=1)\n        scores = self.cls_score(x)\n        bbox_deltas = self.bbox_pred(x)\n\n        return scores, bbox_deltas\n\n\nclass FasterRCNN(FasterRCNNBase):\n    \"\"\"\n    Implements Faster R-CNN.\n\n    The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each\n    image, and should be in 0-1 range. Different images can have different sizes.\n\n    The behavior of the model changes depending if it is in training or evaluation mode.\n\n    During training, the model expects both the input tensors, as well as a targets (list of dictionary),\n    containing:\n        - boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with values\n          between 0 and H and 0 and W\n        - labels (Int64Tensor[N]): the class label for each ground-truth box\n\n    The model returns a Dict[Tensor] during training, containing the classification and regression\n    losses for both the RPN and the R-CNN.\n\n    During inference, the model requires only the input tensors, and returns the post-processed\n    predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as\n    follows:\n        - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values between\n          0 and H and 0 and W\n        - labels (Int64Tensor[N]): the predicted labels for each image\n        - scores (Tensor[N]): the scores or each prediction\n\n    Arguments:\n        backbone (nn.Module): the network used to compute the features for the model.\n            It should contain a out_channels attribute, which indicates the number of output\n            channels that each feature map has (and it should be the same for all feature maps).\n            The backbone should return a single Tensor or and OrderedDict[Tensor].\n        num_classes (int): number of output classes of the model (including the background).\n            If box_predictor is specified, num_classes should be None.\n        min_size (int): minimum size of the image to be rescaled before feeding it to the backbone\n        max_size (int): maximum size of the image to be rescaled before feeding it to the backbone\n        image_mean (Tuple[float, float, float]): mean values used for input normalization.\n            They are generally the mean values of the dataset on which the backbone has been trained\n            on\n        image_std (Tuple[float, float, float]): std values used for input normalization.\n            They are generally the std values of the dataset on which the backbone has been trained on\n        rpn_anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature\n            maps.\n        rpn_head (nn.Module): module that computes the objectness and regression deltas from the RPN\n        rpn_pre_nms_top_n_train (int): number of proposals to keep before applying NMS during training\n        rpn_pre_nms_top_n_test (int): number of proposals to keep before applying NMS during testing\n        rpn_post_nms_top_n_train (int): number of proposals to keep after applying NMS during training\n        rpn_post_nms_top_n_test (int): number of proposals to keep after applying NMS during testing\n        rpn_nms_thresh (float): NMS threshold used for postprocessing the RPN proposals\n        rpn_fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be\n            considered as positive during training of the RPN.\n        rpn_bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be\n            considered as negative during training of the RPN.\n        rpn_batch_size_per_image (int): number of anchors that are sampled during training of the RPN\n            for computing the loss\n        rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training\n            of the RPN\n        rpn_score_thresh (float): during inference, only return proposals with a classification score\n            greater than rpn_score_thresh\n        box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in\n            the locations indicated by the bounding boxes\n        box_head (nn.Module): module that takes the cropped feature maps as input\n        box_predictor (nn.Module): module that takes the output of box_head and returns the\n            classification logits and box regression deltas.\n        box_score_thresh (float): during inference, only return proposals with a classification score\n            greater than box_score_thresh\n        box_nms_thresh (float): NMS threshold for the prediction head. Used during inference\n        box_detections_per_img (int): maximum number of detections per image, for all classes.\n        box_fg_iou_thresh (float): minimum IoU between the proposals and the GT box so that they can be\n            considered as positive during training of the classification head\n        box_bg_iou_thresh (float): maximum IoU between the proposals and the GT box so that they can be\n            considered as negative during training of the classification head\n        box_batch_size_per_image (int): number of proposals that are sampled during training of the\n            classification head\n        box_positive_fraction (float): proportion of positive proposals in a mini-batch during training\n            of the classification head\n        bbox_reg_weights (Tuple[float, float, float, float]): weights for the encoding/decoding of the\n            bounding boxes\n\n    \"\"\"\n\n    def __init__(self, backbone, num_classes=None,\n                 # transform parameter\n                 min_size=800, max_size=1333,      # 预处理resize时限制的最小尺寸与最大尺寸\n                 image_mean=None, image_std=None,  # 预处理normalize时使用的均值和方差\n                 # RPN parameters\n                 rpn_anchor_generator=None, rpn_head=None,\n                 rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000,    # rpn中在nms处理前保留的proposal数(根据score)\n                 rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000,  # rpn中在nms处理后保留的proposal数\n                 rpn_nms_thresh=0.7,  # rpn中进行nms处理时使用的iou阈值\n                 rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3,  # rpn计算损失时，采集正负样本设置的阈值\n                 rpn_batch_size_per_image=256, rpn_positive_fraction=0.5,  # rpn计算损失时采样的样本数，以及正样本占总样本的比例\n                 rpn_score_thresh=0.0,\n                 # Box parameters\n                 box_roi_pool=None, box_head=None, box_predictor=None,\n                 # 移除低目标概率      fast rcnn中进行nms处理的阈值   对预测结果根据score排序取前100个目标\n                 box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100,\n                 box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5,   # fast rcnn计算误差时，采集正负样本设置的阈值\n                 box_batch_size_per_image=512, box_positive_fraction=0.25,  # fast rcnn计算误差时采样的样本数，以及正样本占所有样本的比例\n                 bbox_reg_weights=None):\n        if not hasattr(backbone, \"out_channels\"):\n            raise ValueError(\n                \"backbone should contain an attribute out_channels\"\n                \"specifying the number of output channels  (assumed to be the\"\n                \"same for all the levels\"\n            )\n\n        # assert isinstance(rpn_anchor_generator, (AnchorsGenerator, type(None)))\n        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))\n\n        if num_classes is not None:\n            if box_predictor is not None:\n                raise ValueError(\"num_classes should be None when box_predictor \"\n                                 \"is specified\")\n        else:\n            if box_predictor is None:\n                raise ValueError(\"num_classes should not be None when box_predictor \"\n                                 \"is not specified\")\n\n        # 预测特征层的channels\n        out_channels = backbone.out_channels\n\n        # 若anchor生成器为空，则自动生成针对resnet50_fpn的anchor生成器\n        if rpn_anchor_generator is None:\n            anchor_sizes = ((32,), (64,), (128,), (256,), (512,))\n            aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)\n            rpn_anchor_generator = AnchorsGenerator(\n                anchor_sizes, aspect_ratios\n            )\n\n        # 生成RPN通过滑动窗口预测网络部分\n        if rpn_head is None:\n            rpn_head = RPNHead(\n                out_channels, rpn_anchor_generator.num_anchors_per_location()[0]\n            )\n\n        # 默认rpn_pre_nms_top_n_train = 2000, rpn_pre_nms_top_n_test = 1000,\n        # 默认rpn_post_nms_top_n_train = 2000, rpn_post_nms_top_n_test = 1000,\n        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test)\n        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)\n\n        # 定义整个RPN框架\n        rpn = RegionProposalNetwork(\n            rpn_anchor_generator, rpn_head,\n            rpn_fg_iou_thresh, rpn_bg_iou_thresh,\n            rpn_batch_size_per_image, rpn_positive_fraction,\n            rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh,\n            score_thresh=rpn_score_thresh)\n\n        #  Multi-scale RoIAlign pooling\n        if box_roi_pool is None:\n            box_roi_pool = MultiScaleRoIAlign(\n                featmap_names=['0', '1', '2', '3'],  # 在哪些特征层进行roi pooling\n                output_size=[7, 7],\n                sampling_ratio=2)\n\n        # fast RCNN中roi pooling后的展平处理两个全连接层部分\n        if box_head is None:\n            resolution = box_roi_pool.output_size[0]  # 默认等于7\n            representation_size = 1024\n            box_head = TwoMLPHead(\n                out_channels * resolution ** 2,\n                representation_size\n            )\n\n        # 在box_head的输出上预测部分\n        if box_predictor is None:\n            representation_size = 1024\n            box_predictor = FastRCNNPredictor(\n                representation_size,\n                num_classes)\n\n        # 将roi pooling, box_head以及box_predictor结合在一起\n        roi_heads = RoIHeads(\n            # box\n            box_roi_pool, box_head, box_predictor,\n            box_fg_iou_thresh, box_bg_iou_thresh,  # 0.5  0.5\n            box_batch_size_per_image, box_positive_fraction,  # 512  0.25\n            bbox_reg_weights,\n            box_score_thresh, box_nms_thresh, box_detections_per_img)  # 0.05  0.5  100\n\n        if image_mean is None:\n            image_mean = [0.485, 0.456, 0.406]\n        if image_std is None:\n            image_std = [0.229, 0.224, 0.225]\n\n        # 对数据进行标准化，缩放，打包成batch等处理部分\n        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std)\n\n        super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform)\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/network_files/image_list.py",
    "content": "from typing import List, Tuple\nfrom torch import Tensor\n\n\nclass ImageList(object):\n    \"\"\"\n    Structure that holds a list of images (of possibly\n    varying sizes) as a single tensor.\n    This works by padding the images to the same size,\n    and storing in a field the original sizes of each image\n    \"\"\"\n\n    def __init__(self, tensors, image_sizes):\n        # type: (Tensor, List[Tuple[int, int]]) -> None\n        \"\"\"\n        Arguments:\n            tensors (tensor) padding后的图像数据\n            image_sizes (list[tuple[int, int]])  padding前的图像尺寸\n        \"\"\"\n        self.tensors = tensors\n        self.image_sizes = image_sizes\n\n    def to(self, device):\n        # type: (Device) -> ImageList # noqa\n        cast_tensor = self.tensors.to(device)\n        return ImageList(cast_tensor, self.image_sizes)\n\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/network_files/mask_rcnn.py",
    "content": "from collections import OrderedDict\nimport torch.nn as nn\nfrom torchvision.ops import MultiScaleRoIAlign\n\nfrom .faster_rcnn_framework import FasterRCNN\n\n\nclass MaskRCNN(FasterRCNN):\n    \"\"\"\n        Implements Mask R-CNN.\n\n        The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each\n        image, and should be in 0-1 range. Different images can have different sizes.\n\n        The behavior of the model changes depending if it is in training or evaluation mode.\n\n        During training, the model expects both the input tensors, as well as a targets (list of dictionary),\n        containing:\n            - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with\n              ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.\n            - labels (Int64Tensor[N]): the class label for each ground-truth box\n            - masks (UInt8Tensor[N, H, W]): the segmentation binary masks for each instance\n\n        The model returns a Dict[Tensor] during training, containing the classification and regression\n        losses for both the RPN and the R-CNN, and the mask loss.\n\n        During inference, the model requires only the input tensors, and returns the post-processed\n        predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as\n        follows:\n            - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with\n              ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.\n            - labels (Int64Tensor[N]): the predicted labels for each image\n            - scores (Tensor[N]): the scores or each prediction\n            - masks (UInt8Tensor[N, 1, H, W]): the predicted masks for each instance, in 0-1 range. In order to\n              obtain the final segmentation masks, the soft masks can be thresholded, generally\n              with a value of 0.5 (mask >= 0.5)\n\n        Args:\n            backbone (nn.Module): the network used to compute the features for the model.\n                It should contain a out_channels attribute, which indicates the number of output\n                channels that each feature map has (and it should be the same for all feature maps).\n                The backbone should return a single Tensor or and OrderedDict[Tensor].\n            num_classes (int): number of output classes of the model (including the background).\n                If box_predictor is specified, num_classes should be None.\n            min_size (int): minimum size of the image to be rescaled before feeding it to the backbone\n            max_size (int): maximum size of the image to be rescaled before feeding it to the backbone\n            image_mean (Tuple[float, float, float]): mean values used for input normalization.\n                They are generally the mean values of the dataset on which the backbone has been trained\n                on\n            image_std (Tuple[float, float, float]): std values used for input normalization.\n                They are generally the std values of the dataset on which the backbone has been trained on\n            rpn_anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature\n                maps.\n            rpn_head (nn.Module): module that computes the objectness and regression deltas from the RPN\n            rpn_pre_nms_top_n_train (int): number of proposals to keep before applying NMS during training\n            rpn_pre_nms_top_n_test (int): number of proposals to keep before applying NMS during testing\n            rpn_post_nms_top_n_train (int): number of proposals to keep after applying NMS during training\n            rpn_post_nms_top_n_test (int): number of proposals to keep after applying NMS during testing\n            rpn_nms_thresh (float): NMS threshold used for postprocessing the RPN proposals\n            rpn_fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be\n                considered as positive during training of the RPN.\n            rpn_bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be\n                considered as negative during training of the RPN.\n            rpn_batch_size_per_image (int): number of anchors that are sampled during training of the RPN\n                for computing the loss\n            rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training\n                of the RPN\n            rpn_score_thresh (float): during inference, only return proposals with a classification score\n                greater than rpn_score_thresh\n            box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in\n                the locations indicated by the bounding boxes\n            box_head (nn.Module): module that takes the cropped feature maps as input\n            box_predictor (nn.Module): module that takes the output of box_head and returns the\n                classification logits and box regression deltas.\n            box_score_thresh (float): during inference, only return proposals with a classification score\n                greater than box_score_thresh\n            box_nms_thresh (float): NMS threshold for the prediction head. Used during inference\n            box_detections_per_img (int): maximum number of detections per image, for all classes.\n            box_fg_iou_thresh (float): minimum IoU between the proposals and the GT box so that they can be\n                considered as positive during training of the classification head\n            box_bg_iou_thresh (float): maximum IoU between the proposals and the GT box so that they can be\n                considered as negative during training of the classification head\n            box_batch_size_per_image (int): number of proposals that are sampled during training of the\n                classification head\n            box_positive_fraction (float): proportion of positive proposals in a mini-batch during training\n                of the classification head\n            bbox_reg_weights (Tuple[float, float, float, float]): weights for the encoding/decoding of the\n                bounding boxes\n            mask_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in\n                 the locations indicated by the bounding boxes, which will be used for the mask head.\n            mask_head (nn.Module): module that takes the cropped feature maps as input\n            mask_predictor (nn.Module): module that takes the output of the mask_head and returns the\n                segmentation mask logits\n\n        \"\"\"\n\n    def __init__(\n            self,\n            backbone,\n            num_classes=None,\n            # transform parameters\n            min_size=800,\n            max_size=1333,\n            image_mean=None,\n            image_std=None,\n            # RPN parameters\n            rpn_anchor_generator=None,\n            rpn_head=None,\n            rpn_pre_nms_top_n_train=2000,\n            rpn_pre_nms_top_n_test=1000,\n            rpn_post_nms_top_n_train=2000,\n            rpn_post_nms_top_n_test=1000,\n            rpn_nms_thresh=0.7,\n            rpn_fg_iou_thresh=0.7,\n            rpn_bg_iou_thresh=0.3,\n            rpn_batch_size_per_image=256,\n            rpn_positive_fraction=0.5,\n            rpn_score_thresh=0.0,\n            # Box parameters\n            box_roi_pool=None,\n            box_head=None,\n            box_predictor=None,\n            box_score_thresh=0.05,\n            box_nms_thresh=0.5,\n            box_detections_per_img=100,\n            box_fg_iou_thresh=0.5,\n            box_bg_iou_thresh=0.5,\n            box_batch_size_per_image=512,\n            box_positive_fraction=0.25,\n            bbox_reg_weights=None,\n            # Mask parameters\n            mask_roi_pool=None,\n            mask_head=None,\n            mask_predictor=None,\n    ):\n\n        if not isinstance(mask_roi_pool, (MultiScaleRoIAlign, type(None))):\n            raise TypeError(\n                f\"mask_roi_pool should be of type MultiScaleRoIAlign or None instead of {type(mask_roi_pool)}\"\n            )\n\n        if num_classes is not None:\n            if mask_predictor is not None:\n                raise ValueError(\"num_classes should be None when mask_predictor is specified\")\n\n        out_channels = backbone.out_channels\n\n        if mask_roi_pool is None:\n            mask_roi_pool = MultiScaleRoIAlign(featmap_names=[\"0\", \"1\", \"2\", \"3\"], output_size=14, sampling_ratio=2)\n\n        if mask_head is None:\n            mask_layers = (256, 256, 256, 256)\n            mask_dilation = 1\n            mask_head = MaskRCNNHeads(out_channels, mask_layers, mask_dilation)\n\n        if mask_predictor is None:\n            mask_predictor_in_channels = 256\n            mask_dim_reduced = 256\n            mask_predictor = MaskRCNNPredictor(mask_predictor_in_channels, mask_dim_reduced, num_classes)\n\n        super().__init__(\n            backbone,\n            num_classes,\n            # transform parameters\n            min_size,\n            max_size,\n            image_mean,\n            image_std,\n            # RPN-specific parameters\n            rpn_anchor_generator,\n            rpn_head,\n            rpn_pre_nms_top_n_train,\n            rpn_pre_nms_top_n_test,\n            rpn_post_nms_top_n_train,\n            rpn_post_nms_top_n_test,\n            rpn_nms_thresh,\n            rpn_fg_iou_thresh,\n            rpn_bg_iou_thresh,\n            rpn_batch_size_per_image,\n            rpn_positive_fraction,\n            rpn_score_thresh,\n            # Box parameters\n            box_roi_pool,\n            box_head,\n            box_predictor,\n            box_score_thresh,\n            box_nms_thresh,\n            box_detections_per_img,\n            box_fg_iou_thresh,\n            box_bg_iou_thresh,\n            box_batch_size_per_image,\n            box_positive_fraction,\n            bbox_reg_weights,\n        )\n\n        self.roi_heads.mask_roi_pool = mask_roi_pool\n        self.roi_heads.mask_head = mask_head\n        self.roi_heads.mask_predictor = mask_predictor\n\n\nclass MaskRCNNHeads(nn.Sequential):\n    def __init__(self, in_channels, layers, dilation):\n        \"\"\"\n        Args:\n            in_channels (int): number of input channels\n            layers (tuple): feature dimensions of each FCN layer\n            dilation (int): dilation rate of kernel\n        \"\"\"\n        d = OrderedDict()\n        next_feature = in_channels\n\n        for layer_idx, layers_features in enumerate(layers, 1):\n            d[f\"mask_fcn{layer_idx}\"] = nn.Conv2d(next_feature,\n                                                  layers_features,\n                                                  kernel_size=3,\n                                                  stride=1,\n                                                  padding=dilation,\n                                                  dilation=dilation)\n            d[f\"relu{layer_idx}\"] = nn.ReLU(inplace=True)\n            next_feature = layers_features\n\n        super().__init__(d)\n        # initial params\n        for name, param in self.named_parameters():\n            if \"weight\" in name:\n                nn.init.kaiming_normal_(param, mode=\"fan_out\", nonlinearity=\"relu\")\n\n\nclass MaskRCNNPredictor(nn.Sequential):\n    def __init__(self, in_channels, dim_reduced, num_classes):\n        super().__init__(OrderedDict([\n            (\"conv5_mask\", nn.ConvTranspose2d(in_channels, dim_reduced, 2, 2, 0)),\n            (\"relu\", nn.ReLU(inplace=True)),\n            (\"mask_fcn_logits\", nn.Conv2d(dim_reduced, num_classes, 1, 1, 0))\n        ]))\n        # initial params\n        for name, param in self.named_parameters():\n            if \"weight\" in name:\n                nn.init.kaiming_normal_(param, mode=\"fan_out\", nonlinearity=\"relu\")\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/network_files/roi_head.py",
    "content": "from typing import Optional, List, Dict, Tuple\n\nimport torch\nfrom torch import Tensor\nimport torch.nn.functional as F\nfrom torchvision.ops import roi_align\n\nfrom . import det_utils\nfrom . import boxes as box_ops\n\n\ndef fastrcnn_loss(class_logits, box_regression, labels, regression_targets):\n    # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]\n    \"\"\"\n    Computes the loss for Faster R-CNN.\n\n    Arguments:\n        class_logits : 预测类别概率信息，shape=[num_anchors, num_classes]\n        box_regression : 预测边目标界框回归信息\n        labels : 真实类别信息\n        regression_targets : 真实目标边界框信息\n\n    Returns:\n        classification_loss (Tensor)\n        box_loss (Tensor)\n    \"\"\"\n\n    labels = torch.cat(labels, dim=0)\n    regression_targets = torch.cat(regression_targets, dim=0)\n\n    # 计算类别损失信息\n    classification_loss = F.cross_entropy(class_logits, labels)\n\n    # get indices that correspond to the regression targets for\n    # the corresponding ground truth labels, to be used with\n    # advanced indexing\n    # 返回标签类别大于0的索引\n    # sampled_pos_inds_subset = torch.nonzero(torch.gt(labels, 0)).squeeze(1)\n    sampled_pos_inds_subset = torch.where(torch.gt(labels, 0))[0]\n\n    # 返回标签类别大于0位置的类别信息\n    labels_pos = labels[sampled_pos_inds_subset]\n\n    # shape=[num_proposal, num_classes]\n    N, num_classes = class_logits.shape\n    box_regression = box_regression.reshape(N, -1, 4)\n\n    # 计算边界框损失信息\n    box_loss = det_utils.smooth_l1_loss(\n        # 获取指定索引proposal的指定类别box信息\n        box_regression[sampled_pos_inds_subset, labels_pos],\n        regression_targets[sampled_pos_inds_subset],\n        beta=1 / 9,\n        size_average=False,\n    ) / labels.numel()\n\n    return classification_loss, box_loss\n\n\ndef maskrcnn_inference(x, labels):\n    # type: (Tensor, List[Tensor]) -> List[Tensor]\n    \"\"\"\n    From the results of the CNN, post process the masks\n    by taking the mask corresponding to the class with max\n    probability (which are of fixed size and directly output\n    by the CNN) and return the masks in the mask field of the BoxList.\n\n    Args:\n        x (Tensor): the mask logits\n        labels (list[BoxList]): bounding boxes that are used as\n            reference, one for ech image\n\n    Returns:\n        results (list[BoxList]): one BoxList for each image, containing\n            the extra field mask\n    \"\"\"\n    # 将预测值通过sigmoid激活全部缩放到0~1之间\n    mask_prob = x.sigmoid()\n\n    # select masks corresponding to the predicted classes\n    num_masks = x.shape[0]\n    # 先记录每张图片中boxes/masks的个数\n    boxes_per_image = [label.shape[0] for label in labels]\n    # 在将所有图片中的masks信息拼接在一起(拼接后统一处理能够提升并行度)\n    labels = torch.cat(labels)\n    index = torch.arange(num_masks, device=labels.device)\n    # 提取每个masks中对应预测最终类别的mask\n    mask_prob = mask_prob[index, labels][:, None]\n    # 最后再按照每张图片中的masks个数分离开\n    mask_prob = mask_prob.split(boxes_per_image, dim=0)\n\n    return mask_prob\n\n\ndef project_masks_on_boxes(gt_masks, boxes, matched_idxs, M):\n    # type: (Tensor, Tensor, Tensor, int) -> Tensor\n    \"\"\"\n    Given segmentation masks and the bounding boxes corresponding\n    to the location of the masks in the image, this function\n    crops and resizes the masks in the position defined by the\n    boxes. This prepares the masks for them to be fed to the\n    loss computation as the targets.\n    \"\"\"\n    matched_idxs = matched_idxs.to(boxes)\n    rois = torch.cat([matched_idxs[:, None], boxes], dim=1)\n    gt_masks = gt_masks[:, None].to(rois)\n    return roi_align(gt_masks, rois, (M, M), 1.0)[:, 0]\n\n\ndef maskrcnn_loss(mask_logits, proposals, gt_masks, gt_labels, mask_matched_idxs):\n    # type: (Tensor, List[Tensor], List[Tensor], List[Tensor], List[Tensor]) -> Tensor\n    \"\"\"\n\n    Args:\n        mask_logits:\n        proposals:\n        gt_masks:\n        gt_labels:\n        mask_matched_idxs:\n\n    Returns:\n        mask_loss (Tensor): scalar tensor containing the loss\n    \"\"\"\n\n    # 28(FCN分支输出mask的大小)\n    discretization_size = mask_logits.shape[-1]\n    # 获取每个Proposal(全部为正样本)对应的gt类别\n    labels = [gt_label[idxs] for gt_label, idxs in zip(gt_labels, mask_matched_idxs)]\n    # 根据Proposal信息在gt_masks上裁剪对应区域做为计算loss时的真正gt_mask\n    mask_targets = [\n        project_masks_on_boxes(m, p, i, discretization_size) for m, p, i in zip(gt_masks, proposals, mask_matched_idxs)\n    ]\n\n    # 将一个batch中所有的Proposal对应信息拼接在一起(统一处理提高并行度)\n    labels = torch.cat(labels, dim=0)\n    mask_targets = torch.cat(mask_targets, dim=0)\n\n    # torch.mean (in binary_cross_entropy_with_logits) doesn't\n    # accept empty tensors, so handle it separately\n    if mask_targets.numel() == 0:\n        return mask_logits.sum() * 0\n\n    # 计算预测mask与真实gt_mask之间的BCELoss\n    mask_loss = F.binary_cross_entropy_with_logits(\n        mask_logits[torch.arange(labels.shape[0], device=labels.device), labels], mask_targets\n    )\n    return mask_loss\n\n\nclass RoIHeads(torch.nn.Module):\n    __annotations__ = {\n        'box_coder': det_utils.BoxCoder,\n        'proposal_matcher': det_utils.Matcher,\n        'fg_bg_sampler': det_utils.BalancedPositiveNegativeSampler,\n    }\n\n    def __init__(self,\n                 box_roi_pool,   # Multi-scale RoIAlign pooling\n                 box_head,       # TwoMLPHead\n                 box_predictor,  # FastRCNNPredictor\n                 # Faster R-CNN training\n                 fg_iou_thresh, bg_iou_thresh,  # default: 0.5, 0.5\n                 batch_size_per_image, positive_fraction,  # default: 512, 0.25\n                 bbox_reg_weights,  # None\n                 # Faster R-CNN inference\n                 score_thresh,        # default: 0.05\n                 nms_thresh,          # default: 0.5\n                 detection_per_img,   # default: 100\n                 # Mask\n                 mask_roi_pool=None,\n                 mask_head=None,\n                 mask_predictor=None,\n                 ):\n        super(RoIHeads, self).__init__()\n\n        self.box_similarity = box_ops.box_iou\n        # assign ground-truth boxes for each proposal\n        self.proposal_matcher = det_utils.Matcher(\n            fg_iou_thresh,  # default: 0.5\n            bg_iou_thresh,  # default: 0.5\n            allow_low_quality_matches=False)\n\n        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(\n            batch_size_per_image,  # default: 512\n            positive_fraction)     # default: 0.25\n\n        if bbox_reg_weights is None:\n            bbox_reg_weights = (10., 10., 5., 5.)\n        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)\n\n        self.box_roi_pool = box_roi_pool    # Multi-scale RoIAlign pooling\n        self.box_head = box_head            # TwoMLPHead\n        self.box_predictor = box_predictor  # FastRCNNPredictor\n\n        self.score_thresh = score_thresh  # default: 0.05\n        self.nms_thresh = nms_thresh      # default: 0.5\n        self.detection_per_img = detection_per_img  # default: 100\n\n        self.mask_roi_pool = mask_roi_pool\n        self.mask_head = mask_head\n        self.mask_predictor = mask_predictor\n\n    def has_mask(self):\n        if self.mask_roi_pool is None:\n            return False\n        if self.mask_head is None:\n            return False\n        if self.mask_predictor is None:\n            return False\n        return True\n\n    def assign_targets_to_proposals(self, proposals, gt_boxes, gt_labels):\n        # type: (List[Tensor], List[Tensor], List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]\n        \"\"\"\n        为每个proposal匹配对应的gt_box，并划分到正负样本中\n        Args:\n            proposals:\n            gt_boxes:\n            gt_labels:\n\n        Returns:\n\n        \"\"\"\n        matched_idxs = []\n        labels = []\n        # 遍历每张图像的proposals, gt_boxes, gt_labels信息\n        for proposals_in_image, gt_boxes_in_image, gt_labels_in_image in zip(proposals, gt_boxes, gt_labels):\n            if gt_boxes_in_image.numel() == 0:  # 该张图像中没有gt框，为背景\n                # background image\n                device = proposals_in_image.device\n                clamped_matched_idxs_in_image = torch.zeros(\n                    (proposals_in_image.shape[0],), dtype=torch.int64, device=device\n                )\n                labels_in_image = torch.zeros(\n                    (proposals_in_image.shape[0],), dtype=torch.int64, device=device\n                )\n            else:\n                # set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands\n                # 计算proposal与每个gt_box的iou重合度\n                match_quality_matrix = box_ops.box_iou(gt_boxes_in_image, proposals_in_image)\n\n                # 计算proposal与每个gt_box匹配的iou最大值，并记录索引，\n                # iou < low_threshold索引值为 -1， low_threshold <= iou < high_threshold索引值为 -2\n                matched_idxs_in_image = self.proposal_matcher(match_quality_matrix)\n\n                # 限制最小值，防止匹配标签时出现越界的情况\n                # 注意-1, -2对应的gt索引会调整到0,获取的标签类别为第0个gt的类别（实际上并不是）,后续会进一步处理\n                clamped_matched_idxs_in_image = matched_idxs_in_image.clamp(min=0)\n                # 获取proposal匹配到的gt对应标签\n                labels_in_image = gt_labels_in_image[clamped_matched_idxs_in_image]\n                labels_in_image = labels_in_image.to(dtype=torch.int64)\n\n                # label background (below the low threshold)\n                # 将gt索引为-1的类别设置为0，即背景，负样本\n                bg_inds = matched_idxs_in_image == self.proposal_matcher.BELOW_LOW_THRESHOLD  # -1\n                labels_in_image[bg_inds] = 0\n\n                # label ignore proposals (between low and high threshold)\n                # 将gt索引为-2的类别设置为-1, 即废弃样本\n                ignore_inds = matched_idxs_in_image == self.proposal_matcher.BETWEEN_THRESHOLDS  # -2\n                labels_in_image[ignore_inds] = -1  # -1 is ignored by sampler\n\n            matched_idxs.append(clamped_matched_idxs_in_image)\n            labels.append(labels_in_image)\n        return matched_idxs, labels\n\n    def subsample(self, labels):\n        # type: (List[Tensor]) -> List[Tensor]\n        # BalancedPositiveNegativeSampler\n        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)\n        sampled_inds = []\n        # 遍历每张图片的正负样本索引\n        for img_idx, (pos_inds_img, neg_inds_img) in enumerate(zip(sampled_pos_inds, sampled_neg_inds)):\n            # 记录所有采集样本索引（包括正样本和负样本）\n            # img_sampled_inds = torch.nonzero(pos_inds_img | neg_inds_img).squeeze(1)\n            img_sampled_inds = torch.where(pos_inds_img | neg_inds_img)[0]\n            sampled_inds.append(img_sampled_inds)\n        return sampled_inds\n\n    def add_gt_proposals(self, proposals, gt_boxes):\n        # type: (List[Tensor], List[Tensor]) -> List[Tensor]\n        \"\"\"\n        将gt_boxes拼接到proposal后面\n        Args:\n            proposals: 一个batch中每张图像rpn预测的boxes\n            gt_boxes:  一个batch中每张图像对应的真实目标边界框\n\n        Returns:\n\n        \"\"\"\n        proposals = [\n            torch.cat((proposal, gt_box))\n            for proposal, gt_box in zip(proposals, gt_boxes)\n        ]\n        return proposals\n\n    def check_targets(self, targets):\n        # type: (Optional[List[Dict[str, Tensor]]]) -> None\n        assert targets is not None\n        assert all([\"boxes\" in t for t in targets])\n        assert all([\"labels\" in t for t in targets])\n\n    def select_training_samples(self,\n                                proposals,  # type: List[Tensor]\n                                targets     # type: Optional[List[Dict[str, Tensor]]]\n                                ):\n        # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor], List[Tensor]]\n        \"\"\"\n        划分正负样本，统计对应gt的标签以及边界框回归信息\n        list元素个数为batch_size\n        Args:\n            proposals: rpn预测的boxes\n            targets:\n\n        Returns:\n\n        \"\"\"\n\n        # 检查target数据是否为空\n        self.check_targets(targets)\n        if targets is None:\n            raise ValueError(\"target should not be None.\")\n\n        dtype = proposals[0].dtype\n        device = proposals[0].device\n\n        # 获取标注好的boxes以及labels信息\n        gt_boxes = [t[\"boxes\"].to(dtype) for t in targets]\n        gt_labels = [t[\"labels\"] for t in targets]\n\n        # append ground-truth bboxes to proposal\n        # 将gt_boxes拼接到proposal后面\n        proposals = self.add_gt_proposals(proposals, gt_boxes)\n\n        # get matching gt indices for each proposal\n        # 为每个proposal匹配对应的gt_box，并划分到正负样本中\n        matched_idxs, labels = self.assign_targets_to_proposals(proposals, gt_boxes, gt_labels)\n        # sample a fixed proportion of positive-negative proposals\n        # 按给定数量和比例采样正负样本\n        sampled_inds = self.subsample(labels)\n        matched_gt_boxes = []\n        num_images = len(proposals)\n\n        # 遍历每张图像\n        for img_id in range(num_images):\n            # 获取每张图像的正负样本索引\n            img_sampled_inds = sampled_inds[img_id]\n            # 获取对应正负样本的proposals信息\n            proposals[img_id] = proposals[img_id][img_sampled_inds]\n            # 获取对应正负样本的真实类别信息\n            labels[img_id] = labels[img_id][img_sampled_inds]\n            # 获取对应正负样本的gt索引信息\n            matched_idxs[img_id] = matched_idxs[img_id][img_sampled_inds]\n\n            gt_boxes_in_image = gt_boxes[img_id]\n            if gt_boxes_in_image.numel() == 0:\n                gt_boxes_in_image = torch.zeros((1, 4), dtype=dtype, device=device)\n            # 获取对应正负样本的gt box信息\n            matched_gt_boxes.append(gt_boxes_in_image[matched_idxs[img_id]])\n\n        # 根据gt和proposal计算边框回归参数（针对gt的）\n        regression_targets = self.box_coder.encode(matched_gt_boxes, proposals)\n        return proposals, matched_idxs, labels, regression_targets\n\n    def postprocess_detections(self,\n                               class_logits,    # type: Tensor\n                               box_regression,  # type: Tensor\n                               proposals,       # type: List[Tensor]\n                               image_shapes     # type: List[Tuple[int, int]]\n                               ):\n        # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]\n        \"\"\"\n        对网络的预测数据进行后处理，包括\n        （1）根据proposal以及预测的回归参数计算出最终bbox坐标\n        （2）对预测类别结果进行softmax处理\n        （3）裁剪预测的boxes信息，将越界的坐标调整到图片边界上\n        （4）移除所有背景信息\n        （5）移除低概率目标\n        （6）移除小尺寸目标\n        （7）执行nms处理，并按scores进行排序\n        （8）根据scores排序返回前topk个目标\n        Args:\n            class_logits: 网络预测类别概率信息\n            box_regression: 网络预测的边界框回归参数\n            proposals: rpn输出的proposal\n            image_shapes: 打包成batch前每张图像的宽高\n\n        Returns:\n\n        \"\"\"\n        device = class_logits.device\n        # 预测目标类别数\n        num_classes = class_logits.shape[-1]\n\n        # 获取每张图像的预测bbox数量\n        boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals]\n        # 根据proposal以及预测的回归参数计算出最终bbox坐标\n        pred_boxes = self.box_coder.decode(box_regression, proposals)\n\n        # 对预测类别结果进行softmax处理\n        pred_scores = F.softmax(class_logits, -1)\n\n        # split boxes and scores per image\n        # 根据每张图像的预测bbox数量分割结果\n        pred_boxes_list = pred_boxes.split(boxes_per_image, 0)\n        pred_scores_list = pred_scores.split(boxes_per_image, 0)\n\n        all_boxes = []\n        all_scores = []\n        all_labels = []\n        # 遍历每张图像预测信息\n        for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes):\n            # 裁剪预测的boxes信息，将越界的坐标调整到图片边界上\n            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)\n\n            # create labels for each prediction\n            labels = torch.arange(num_classes, device=device)\n            labels = labels.view(1, -1).expand_as(scores)\n\n            # remove prediction with the background label\n            # 移除索引为0的所有信息（0代表背景）\n            boxes = boxes[:, 1:]\n            scores = scores[:, 1:]\n            labels = labels[:, 1:]\n\n            # batch everything, by making every class prediction be a separate instance\n            boxes = boxes.reshape(-1, 4)\n            scores = scores.reshape(-1)\n            labels = labels.reshape(-1)\n\n            # remove low scoring boxes\n            # 移除低概率目标，self.scores_thresh=0.05\n            # gt: Computes input > other element-wise.\n            # inds = torch.nonzero(torch.gt(scores, self.score_thresh)).squeeze(1)\n            inds = torch.where(torch.gt(scores, self.score_thresh))[0]\n            boxes, scores, labels = boxes[inds], scores[inds], labels[inds]\n\n            # remove empty boxes\n            # 移除小目标\n            keep = box_ops.remove_small_boxes(boxes, min_size=1.)\n            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]\n\n            # non-maximun suppression, independently done per class\n            # 执行nms处理，执行后的结果会按照scores从大到小进行排序返回\n            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)\n\n            # keep only topk scoring predictions\n            # 获取scores排在前topk个预测目标\n            keep = keep[:self.detection_per_img]\n            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]\n\n            all_boxes.append(boxes)\n            all_scores.append(scores)\n            all_labels.append(labels)\n\n        return all_boxes, all_scores, all_labels\n\n    def forward(self,\n                features,       # type: Dict[str, Tensor]\n                proposals,      # type: List[Tensor]\n                image_shapes,   # type: List[Tuple[int, int]]\n                targets=None    # type: Optional[List[Dict[str, Tensor]]]\n                ):\n        # type: (...) -> Tuple[List[Dict[str, Tensor]], Dict[str, Tensor]]\n        \"\"\"\n        Arguments:\n            features (List[Tensor])\n            proposals (List[Tensor[N, 4]])\n            image_shapes (List[Tuple[H, W]])\n            targets (List[Dict])\n        \"\"\"\n\n        # 检查targets的数据类型是否正确\n        if targets is not None:\n            for t in targets:\n                floating_point_types = (torch.float, torch.double, torch.half)\n                assert t[\"boxes\"].dtype in floating_point_types, \"target boxes must of float type\"\n                assert t[\"labels\"].dtype == torch.int64, \"target labels must of int64 type\"\n\n        if self.training:\n            # 划分正负样本，统计对应gt的标签以及边界框回归信息\n            proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets)\n        else:\n            labels = None\n            regression_targets = None\n            matched_idxs = None\n\n        # 将采集样本通过Multi-scale RoIAlign pooling层\n        # box_features_shape: [num_proposals, channel, height, width]\n        box_features = self.box_roi_pool(features, proposals, image_shapes)\n\n        # 通过roi_pooling后的两层全连接层\n        # box_features_shape: [num_proposals, representation_size]\n        box_features = self.box_head(box_features)\n\n        # 接着分别预测目标类别和边界框回归参数\n        class_logits, box_regression = self.box_predictor(box_features)\n\n        result: List[Dict[str, torch.Tensor]] = []\n        losses = {}\n        if self.training:\n            assert labels is not None and regression_targets is not None\n            loss_classifier, loss_box_reg = fastrcnn_loss(\n                class_logits, box_regression, labels, regression_targets)\n            losses = {\n                \"loss_classifier\": loss_classifier,\n                \"loss_box_reg\": loss_box_reg\n            }\n        else:\n            boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals, image_shapes)\n            num_images = len(boxes)\n            for i in range(num_images):\n                result.append(\n                    {\n                        \"boxes\": boxes[i],\n                        \"labels\": labels[i],\n                        \"scores\": scores[i],\n                    }\n                )\n\n        if self.has_mask():\n            mask_proposals = [p[\"boxes\"] for p in result]  # 将最终预测的Boxes信息取出\n            if self.training:\n                # matched_idxs为每个proposal在正负样本匹配过程中得到的gt索引(背景的gt索引也默认设置成了0)\n                if matched_idxs is None:\n                    raise ValueError(\"if in training, matched_idxs should not be None\")\n\n                # during training, only focus on positive boxes\n                num_images = len(proposals)\n                mask_proposals = []\n                pos_matched_idxs = []\n                for img_id in range(num_images):\n                    pos = torch.where(labels[img_id] > 0)[0]  # 寻找对应gt类别大于0，即正样本\n                    mask_proposals.append(proposals[img_id][pos])\n                    pos_matched_idxs.append(matched_idxs[img_id][pos])\n            else:\n                pos_matched_idxs = None\n\n            mask_features = self.mask_roi_pool(features, mask_proposals, image_shapes)\n            mask_features = self.mask_head(mask_features)\n            mask_logits = self.mask_predictor(mask_features)\n\n            loss_mask = {}\n            if self.training:\n                if targets is None or pos_matched_idxs is None or mask_logits is None:\n                    raise ValueError(\"targets, pos_matched_idxs, mask_logits cannot be None when training\")\n\n                gt_masks = [t[\"masks\"] for t in targets]\n                gt_labels = [t[\"labels\"] for t in targets]\n                rcnn_loss_mask = maskrcnn_loss(mask_logits, mask_proposals, gt_masks, gt_labels, pos_matched_idxs)\n                loss_mask = {\"loss_mask\": rcnn_loss_mask}\n            else:\n                labels = [r[\"labels\"] for r in result]\n                mask_probs = maskrcnn_inference(mask_logits, labels)\n                for mask_prob, r in zip(mask_probs, result):\n                    r[\"masks\"] = mask_prob\n\n            losses.update(loss_mask)\n\n        return result, losses\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/network_files/rpn_function.py",
    "content": "from typing import List, Optional, Dict, Tuple\n\nimport torch\nfrom torch import nn, Tensor\nfrom torch.nn import functional as F\nimport torchvision\n\nfrom . import det_utils\nfrom . import boxes as box_ops\nfrom .image_list import ImageList\n\n\n@torch.jit.unused\ndef _onnx_get_num_anchors_and_pre_nms_top_n(ob, orig_pre_nms_top_n):\n    # type: (Tensor, int) -> Tuple[int, int]\n    from torch.onnx import operators\n    num_anchors = operators.shape_as_tensor(ob)[1].unsqueeze(0)\n    pre_nms_top_n = torch.min(torch.cat(\n        (torch.tensor([orig_pre_nms_top_n], dtype=num_anchors.dtype),\n         num_anchors), 0))\n\n    return num_anchors, pre_nms_top_n\n\n\nclass AnchorsGenerator(nn.Module):\n    __annotations__ = {\n        \"cell_anchors\": Optional[List[torch.Tensor]],\n        \"_cache\": Dict[str, List[torch.Tensor]]\n    }\n\n    \"\"\"\n    anchors生成器\n    Module that generates anchors for a set of feature maps and\n    image sizes.\n\n    The module support computing anchors at multiple sizes and aspect ratios\n    per feature map.\n\n    sizes and aspect_ratios should have the same number of elements, and it should\n    correspond to the number of feature maps.\n\n    sizes[i] and aspect_ratios[i] can have an arbitrary number of elements,\n    and AnchorGenerator will output a set of sizes[i] * aspect_ratios[i] anchors\n    per spatial location for feature map i.\n\n    Arguments:\n        sizes (Tuple[Tuple[int]]):\n        aspect_ratios (Tuple[Tuple[float]]):\n    \"\"\"\n\n    def __init__(self, sizes=(128, 256, 512), aspect_ratios=(0.5, 1.0, 2.0)):\n        super(AnchorsGenerator, self).__init__()\n\n        if not isinstance(sizes[0], (list, tuple)):\n            # TODO change this\n            sizes = tuple((s,) for s in sizes)\n        if not isinstance(aspect_ratios[0], (list, tuple)):\n            aspect_ratios = (aspect_ratios,) * len(sizes)\n\n        assert len(sizes) == len(aspect_ratios)\n\n        self.sizes = sizes\n        self.aspect_ratios = aspect_ratios\n        self.cell_anchors = None\n        self._cache = {}\n\n    def generate_anchors(self, scales, aspect_ratios, dtype=torch.float32, device=torch.device(\"cpu\")):\n        # type: (List[int], List[float], torch.dtype, torch.device) -> Tensor\n        \"\"\"\n        compute anchor sizes\n        Arguments:\n            scales: sqrt(anchor_area)\n            aspect_ratios: h/w ratios\n            dtype: float32\n            device: cpu/gpu\n        \"\"\"\n        scales = torch.as_tensor(scales, dtype=dtype, device=device)\n        aspect_ratios = torch.as_tensor(aspect_ratios, dtype=dtype, device=device)\n        h_ratios = torch.sqrt(aspect_ratios)\n        w_ratios = 1.0 / h_ratios\n\n        # [r1, r2, r3]' * [s1, s2, s3]\n        # number of elements is len(ratios)*len(scales)\n        ws = (w_ratios[:, None] * scales[None, :]).view(-1)\n        hs = (h_ratios[:, None] * scales[None, :]).view(-1)\n\n        # left-top, right-bottom coordinate relative to anchor center(0, 0)\n        # 生成的anchors模板都是以（0, 0）为中心的, shape [len(ratios)*len(scales), 4]\n        base_anchors = torch.stack([-ws, -hs, ws, hs], dim=1) / 2\n\n        return base_anchors.round()  # round 四舍五入\n\n    def set_cell_anchors(self, dtype, device):\n        # type: (torch.dtype, torch.device) -> None\n        if self.cell_anchors is not None:\n            cell_anchors = self.cell_anchors\n            assert cell_anchors is not None\n            # suppose that all anchors have the same device\n            # which is a valid assumption in the current state of the codebase\n            if cell_anchors[0].device == device:\n                return\n\n        # 根据提供的sizes和aspect_ratios生成anchors模板\n        # anchors模板都是以(0, 0)为中心的anchor\n        cell_anchors = [\n            self.generate_anchors(sizes, aspect_ratios, dtype, device)\n            for sizes, aspect_ratios in zip(self.sizes, self.aspect_ratios)\n        ]\n        self.cell_anchors = cell_anchors\n\n    def num_anchors_per_location(self):\n        # 计算每个预测特征层上每个滑动窗口的预测目标数\n        return [len(s) * len(a) for s, a in zip(self.sizes, self.aspect_ratios)]\n\n    # For every combination of (a, (g, s), i) in (self.cell_anchors, zip(grid_sizes, strides), 0:2),\n    # output g[i] anchors that are s[i] distance apart in direction i, with the same dimensions as a.\n    def grid_anchors(self, grid_sizes, strides):\n        # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor]\n        \"\"\"\n        anchors position in grid coordinate axis map into origin image\n        计算预测特征图对应原始图像上的所有anchors的坐标\n        Args:\n            grid_sizes: 预测特征矩阵的height和width\n            strides: 预测特征矩阵上一步对应原始图像上的步距\n        \"\"\"\n        anchors = []\n        cell_anchors = self.cell_anchors\n        assert cell_anchors is not None\n\n        # 遍历每个预测特征层的grid_size，strides和cell_anchors\n        for size, stride, base_anchors in zip(grid_sizes, strides, cell_anchors):\n            grid_height, grid_width = size\n            stride_height, stride_width = stride\n            device = base_anchors.device\n\n            # For output anchor, compute [x_center, y_center, x_center, y_center]\n            # shape: [grid_width] 对应原图上的x坐标(列)\n            shifts_x = torch.arange(0, grid_width, dtype=torch.float32, device=device) * stride_width\n            # shape: [grid_height] 对应原图上的y坐标(行)\n            shifts_y = torch.arange(0, grid_height, dtype=torch.float32, device=device) * stride_height\n\n            # 计算预测特征矩阵上每个点对应原图上的坐标(anchors模板的坐标偏移量)\n            # torch.meshgrid函数分别传入行坐标和列坐标，生成网格行坐标矩阵和网格列坐标矩阵\n            # shape: [grid_height, grid_width]\n            shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)\n            shift_x = shift_x.reshape(-1)\n            shift_y = shift_y.reshape(-1)\n\n            # 计算anchors坐标(xmin, ymin, xmax, ymax)在原图上的坐标偏移量\n            # shape: [grid_width*grid_height, 4]\n            shifts = torch.stack([shift_x, shift_y, shift_x, shift_y], dim=1)\n\n            # For every (base anchor, output anchor) pair,\n            # offset each zero-centered base anchor by the center of the output anchor.\n            # 将anchors模板与原图上的坐标偏移量相加得到原图上所有anchors的坐标信息(shape不同时会使用广播机制)\n            shifts_anchor = shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)\n            anchors.append(shifts_anchor.reshape(-1, 4))\n\n        return anchors  # List[Tensor(all_num_anchors, 4)]\n\n    def cached_grid_anchors(self, grid_sizes, strides):\n        # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor]\n        \"\"\"将计算得到的所有anchors信息进行缓存\"\"\"\n        key = str(grid_sizes) + str(strides)\n        # self._cache是字典类型\n        if key in self._cache:\n            return self._cache[key]\n        anchors = self.grid_anchors(grid_sizes, strides)\n        self._cache[key] = anchors\n        return anchors\n\n    def forward(self, image_list, feature_maps):\n        # type: (ImageList, List[Tensor]) -> List[Tensor]\n        # 获取每个预测特征层的尺寸(height, width)\n        grid_sizes = list([feature_map.shape[-2:] for feature_map in feature_maps])\n\n        # 获取输入图像的height和width\n        image_size = image_list.tensors.shape[-2:]\n\n        # 获取变量类型和设备类型\n        dtype, device = feature_maps[0].dtype, feature_maps[0].device\n\n        # one step in feature map equate n pixel stride in origin image\n        # 计算特征层上的一步等于原始图像上的步长\n        strides = [[torch.tensor(image_size[0] // g[0], dtype=torch.int64, device=device),\n                    torch.tensor(image_size[1] // g[1], dtype=torch.int64, device=device)] for g in grid_sizes]\n\n        # 根据提供的sizes和aspect_ratios生成anchors模板\n        self.set_cell_anchors(dtype, device)\n\n        # 计算/读取所有anchors的坐标信息（这里的anchors信息是映射到原图上的所有anchors信息，不是anchors模板）\n        # 得到的是一个list列表，对应每张预测特征图映射回原图的anchors坐标信息\n        anchors_over_all_feature_maps = self.cached_grid_anchors(grid_sizes, strides)\n\n        anchors = torch.jit.annotate(List[List[torch.Tensor]], [])\n        # 遍历一个batch中的每张图像\n        for i, (image_height, image_width) in enumerate(image_list.image_sizes):\n            anchors_in_image = []\n            # 遍历每张预测特征图映射回原图的anchors坐标信息\n            for anchors_per_feature_map in anchors_over_all_feature_maps:\n                anchors_in_image.append(anchors_per_feature_map)\n            anchors.append(anchors_in_image)\n        # 将每一张图像的所有预测特征层的anchors坐标信息拼接在一起\n        # anchors是个list，每个元素为一张图像的所有anchors信息\n        anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors]\n        # Clear the cache in case that memory leaks.\n        self._cache.clear()\n        return anchors\n\n\nclass RPNHead(nn.Module):\n    \"\"\"\n    add a RPN head with classification and regression\n    通过滑动窗口计算预测目标概率与bbox regression参数\n\n    Arguments:\n        in_channels: number of channels of the input feature\n        num_anchors: number of anchors to be predicted\n    \"\"\"\n\n    def __init__(self, in_channels, num_anchors):\n        super(RPNHead, self).__init__()\n        # 3x3 滑动窗口\n        self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)\n        # 计算预测的目标分数（这里的目标只是指前景或者背景）\n        self.cls_logits = nn.Conv2d(in_channels, num_anchors, kernel_size=1, stride=1)\n        # 计算预测的目标bbox regression参数\n        self.bbox_pred = nn.Conv2d(in_channels, num_anchors * 4, kernel_size=1, stride=1)\n\n        for layer in self.children():\n            if isinstance(layer, nn.Conv2d):\n                torch.nn.init.normal_(layer.weight, std=0.01)\n                torch.nn.init.constant_(layer.bias, 0)\n\n    def forward(self, x):\n        # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]\n        logits = []\n        bbox_reg = []\n        for i, feature in enumerate(x):\n            t = F.relu(self.conv(feature))\n            logits.append(self.cls_logits(t))\n            bbox_reg.append(self.bbox_pred(t))\n        return logits, bbox_reg\n\n\ndef permute_and_flatten(layer, N, A, C, H, W):\n    # type: (Tensor, int, int, int, int, int) -> Tensor\n    \"\"\"\n    调整tensor顺序，并进行reshape\n    Args:\n        layer: 预测特征层上预测的目标概率或bboxes regression参数\n        N: batch_size\n        A: anchors_num_per_position\n        C: classes_num or 4(bbox coordinate)\n        H: height\n        W: width\n\n    Returns:\n        layer: 调整tensor顺序，并reshape后的结果[N, -1, C]\n    \"\"\"\n    # view和reshape功能是一样的，先展平所有元素在按照给定shape排列\n    # view函数只能用于内存中连续存储的tensor，permute等操作会使tensor在内存中变得不再连续，此时就不能再调用view函数\n    # reshape则不需要依赖目标tensor是否在内存中是连续的\n    # [batch_size, anchors_num_per_position * (C or 4), height, width]\n    layer = layer.view(N, -1, C,  H, W)\n    # 调换tensor维度\n    layer = layer.permute(0, 3, 4, 1, 2)  # [N, H, W, -1, C]\n    layer = layer.reshape(N, -1, C)\n    return layer\n\n\ndef concat_box_prediction_layers(box_cls, box_regression):\n    # type: (List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]\n    \"\"\"\n    对box_cla和box_regression两个list中的每个预测特征层的预测信息\n    的tensor排列顺序以及shape进行调整 -> [N, -1, C]\n    Args:\n        box_cls: 每个预测特征层上的预测目标概率\n        box_regression: 每个预测特征层上的预测目标bboxes regression参数\n\n    Returns:\n\n    \"\"\"\n    box_cls_flattened = []\n    box_regression_flattened = []\n\n    # 遍历每个预测特征层\n    for box_cls_per_level, box_regression_per_level in zip(box_cls, box_regression):\n        # [batch_size, anchors_num_per_position * classes_num, height, width]\n        # 注意，当计算RPN中的proposal时，classes_num=1,只区分目标和背景\n        N, AxC, H, W = box_cls_per_level.shape\n        # # [batch_size, anchors_num_per_position * 4, height, width]\n        Ax4 = box_regression_per_level.shape[1]\n        # anchors_num_per_position\n        A = Ax4 // 4\n        # classes_num\n        C = AxC // A\n\n        # [N, -1, C]\n        box_cls_per_level = permute_and_flatten(box_cls_per_level, N, A, C, H, W)\n        box_cls_flattened.append(box_cls_per_level)\n\n        # [N, -1, C]\n        box_regression_per_level = permute_and_flatten(box_regression_per_level, N, A, 4, H, W)\n        box_regression_flattened.append(box_regression_per_level)\n\n    box_cls = torch.cat(box_cls_flattened, dim=1).flatten(0, -2)  # start_dim, end_dim\n    box_regression = torch.cat(box_regression_flattened, dim=1).reshape(-1, 4)\n    return box_cls, box_regression\n\n\nclass RegionProposalNetwork(torch.nn.Module):\n    \"\"\"\n    Implements Region Proposal Network (RPN).\n\n    Arguments:\n        anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature\n            maps.\n        head (nn.Module): module that computes the objectness and regression deltas\n        fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be\n            considered as positive during training of the RPN.\n        bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be\n            considered as negative during training of the RPN.\n        batch_size_per_image (int): number of anchors that are sampled during training of the RPN\n            for computing the loss\n        positive_fraction (float): proportion of positive anchors in a mini-batch during training\n            of the RPN\n        pre_nms_top_n (Dict[str]): number of proposals to keep before applying NMS. It should\n            contain two fields: training and testing, to allow for different values depending\n            on training or evaluation\n        post_nms_top_n (Dict[str]): number of proposals to keep after applying NMS. It should\n            contain two fields: training and testing, to allow for different values depending\n            on training or evaluation\n        nms_thresh (float): NMS threshold used for postprocessing the RPN proposals\n\n    \"\"\"\n    __annotations__ = {\n        'box_coder': det_utils.BoxCoder,\n        'proposal_matcher': det_utils.Matcher,\n        'fg_bg_sampler': det_utils.BalancedPositiveNegativeSampler,\n        'pre_nms_top_n': Dict[str, int],\n        'post_nms_top_n': Dict[str, int],\n    }\n\n    def __init__(self, anchor_generator, head,\n                 fg_iou_thresh, bg_iou_thresh,\n                 batch_size_per_image, positive_fraction,\n                 pre_nms_top_n, post_nms_top_n, nms_thresh, score_thresh=0.0):\n        super(RegionProposalNetwork, self).__init__()\n        self.anchor_generator = anchor_generator\n        self.head = head\n        self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0))\n\n        # use during training\n        # 计算anchors与真实bbox的iou\n        self.box_similarity = box_ops.box_iou\n\n        self.proposal_matcher = det_utils.Matcher(\n            fg_iou_thresh,  # 当iou大于fg_iou_thresh(0.7)时视为正样本\n            bg_iou_thresh,  # 当iou小于bg_iou_thresh(0.3)时视为负样本\n            allow_low_quality_matches=True\n        )\n\n        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(\n            batch_size_per_image, positive_fraction  # 256, 0.5\n        )\n\n        # use during testing\n        self._pre_nms_top_n = pre_nms_top_n\n        self._post_nms_top_n = post_nms_top_n\n        self.nms_thresh = nms_thresh\n        self.score_thresh = score_thresh\n        self.min_size = 1.\n\n    def pre_nms_top_n(self):\n        if self.training:\n            return self._pre_nms_top_n['training']\n        return self._pre_nms_top_n['testing']\n\n    def post_nms_top_n(self):\n        if self.training:\n            return self._post_nms_top_n['training']\n        return self._post_nms_top_n['testing']\n\n    def assign_targets_to_anchors(self, anchors, targets):\n        # type: (List[Tensor], List[Dict[str, Tensor]]) -> Tuple[List[Tensor], List[Tensor]]\n        \"\"\"\n        计算每个anchors最匹配的gt，并划分为正样本，背景以及废弃的样本\n        Args：\n            anchors: (List[Tensor])\n            targets: (List[Dict[Tensor])\n        Returns:\n            labels: 标记anchors归属类别（1, 0, -1分别对应正样本，背景，废弃的样本）\n                    注意，在RPN中只有前景和背景，所有正样本的类别都是1，0代表背景\n            matched_gt_boxes：与anchors匹配的gt\n        \"\"\"\n        labels = []\n        matched_gt_boxes = []\n        # 遍历每张图像的anchors和targets\n        for anchors_per_image, targets_per_image in zip(anchors, targets):\n            gt_boxes = targets_per_image[\"boxes\"]\n            if gt_boxes.numel() == 0:\n                device = anchors_per_image.device\n                matched_gt_boxes_per_image = torch.zeros(anchors_per_image.shape, dtype=torch.float32, device=device)\n                labels_per_image = torch.zeros((anchors_per_image.shape[0],), dtype=torch.float32, device=device)\n            else:\n                # 计算anchors与真实bbox的iou信息\n                # set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands\n                match_quality_matrix = box_ops.box_iou(gt_boxes, anchors_per_image)\n                # 计算每个anchors与gt匹配iou最大的索引（如果iou<0.3索引置为-1，0.3<iou<0.7索引为-2）\n                matched_idxs = self.proposal_matcher(match_quality_matrix)\n                # get the targets corresponding GT for each proposal\n                # NB: need to clamp the indices because we can have a single\n                # GT in the image, and matched_idxs can be -2, which goes\n                # out of bounds\n                matched_gt_boxes_per_image = gt_boxes[matched_idxs.clamp(min=0)]\n\n                labels_per_image = matched_idxs >= 0\n                labels_per_image = labels_per_image.to(dtype=torch.float32)\n\n                # background (negative examples)\n                bg_indices = matched_idxs == self.proposal_matcher.BELOW_LOW_THRESHOLD  # -1\n                labels_per_image[bg_indices] = 0.0\n\n                # discard indices that are between thresholds\n                inds_to_discard = matched_idxs == self.proposal_matcher.BETWEEN_THRESHOLDS  # -2\n                labels_per_image[inds_to_discard] = -1.0\n\n            labels.append(labels_per_image)\n            matched_gt_boxes.append(matched_gt_boxes_per_image)\n        return labels, matched_gt_boxes\n\n    def _get_top_n_idx(self, objectness, num_anchors_per_level):\n        # type: (Tensor, List[int]) -> Tensor\n        \"\"\"\n        获取每张预测特征图上预测概率排前pre_nms_top_n的anchors索引值\n        Args:\n            objectness: Tensor(每张图像的预测目标概率信息 )\n            num_anchors_per_level: List（每个预测特征层上的预测的anchors个数）\n        Returns:\n\n        \"\"\"\n        r = []  # 记录每个预测特征层上预测目标概率前pre_nms_top_n的索引信息\n        offset = 0\n        # 遍历每个预测特征层上的预测目标概率信息\n        for ob in objectness.split(num_anchors_per_level, 1):\n            if torchvision._is_tracing():\n                num_anchors, pre_nms_top_n = _onnx_get_num_anchors_and_pre_nms_top_n(ob, self.pre_nms_top_n())\n            else:\n                num_anchors = ob.shape[1]  # 预测特征层上的预测的anchors个数\n                pre_nms_top_n = min(self.pre_nms_top_n(), num_anchors)\n\n            # Returns the k largest elements of the given input tensor along a given dimension\n            _, top_n_idx = ob.topk(pre_nms_top_n, dim=1)\n            r.append(top_n_idx + offset)\n            offset += num_anchors\n        return torch.cat(r, dim=1)\n\n    def filter_proposals(self, proposals, objectness, image_shapes, num_anchors_per_level):\n        # type: (Tensor, Tensor, List[Tuple[int, int]], List[int]) -> Tuple[List[Tensor], List[Tensor]]\n        \"\"\"\n        筛除小boxes框，nms处理，根据预测概率获取前post_nms_top_n个目标\n        Args:\n            proposals: 预测的bbox坐标\n            objectness: 预测的目标概率\n            image_shapes: batch中每张图片的size信息\n            num_anchors_per_level: 每个预测特征层上预测anchors的数目\n\n        Returns:\n\n        \"\"\"\n        num_images = proposals.shape[0]\n        device = proposals.device\n\n        # do not backprop throught objectness\n        objectness = objectness.detach()\n        objectness = objectness.reshape(num_images, -1)\n\n        # Returns a tensor of size size filled with fill_value\n        # levels负责记录分隔不同预测特征层上的anchors索引信息\n        levels = [torch.full((n, ), idx, dtype=torch.int64, device=device)\n                  for idx, n in enumerate(num_anchors_per_level)]\n        levels = torch.cat(levels, 0)\n\n        # Expand this tensor to the same size as objectness\n        levels = levels.reshape(1, -1).expand_as(objectness)\n\n        # select top_n boxes independently per level before applying nms\n        # 获取每张预测特征图上预测概率排前pre_nms_top_n的anchors索引值\n        top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level)\n\n        image_range = torch.arange(num_images, device=device)\n        batch_idx = image_range[:, None]  # [batch_size, 1]\n\n        # 根据每个预测特征层预测概率排前pre_nms_top_n的anchors索引值获取相应概率信息\n        objectness = objectness[batch_idx, top_n_idx]\n        levels = levels[batch_idx, top_n_idx]\n        # 预测概率排前pre_nms_top_n的anchors索引值获取相应bbox坐标信息\n        proposals = proposals[batch_idx, top_n_idx]\n\n        objectness_prob = torch.sigmoid(objectness)\n\n        final_boxes = []\n        final_scores = []\n        # 遍历每张图像的相关预测信息\n        for boxes, scores, lvl, img_shape in zip(proposals, objectness_prob, levels, image_shapes):\n            # 调整预测的boxes信息，将越界的坐标调整到图片边界上\n            boxes = box_ops.clip_boxes_to_image(boxes, img_shape)\n\n            # 返回boxes满足宽，高都大于min_size的索引\n            keep = box_ops.remove_small_boxes(boxes, self.min_size)\n            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]\n\n            # 移除小概率boxes，参考下面这个链接\n            # https://github.com/pytorch/vision/pull/3205\n            keep = torch.where(torch.ge(scores, self.score_thresh))[0]  # ge: >=\n            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]\n\n            # non-maximum suppression, independently done per level\n            keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh)\n\n            # keep only topk scoring predictions\n            keep = keep[: self.post_nms_top_n()]\n            boxes, scores = boxes[keep], scores[keep]\n\n            final_boxes.append(boxes)\n            final_scores.append(scores)\n        return final_boxes, final_scores\n\n    def compute_loss(self, objectness, pred_bbox_deltas, labels, regression_targets):\n        # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]\n        \"\"\"\n        计算RPN损失，包括类别损失（前景与背景），bbox regression损失\n        Arguments:\n            objectness (Tensor)：预测的前景概率\n            pred_bbox_deltas (Tensor)：预测的bbox regression\n            labels (List[Tensor])：真实的标签 1, 0, -1（batch中每一张图片的labels对应List的一个元素中）\n            regression_targets (List[Tensor])：真实的bbox regression\n\n        Returns:\n            objectness_loss (Tensor) : 类别损失\n            box_loss (Tensor)：边界框回归损失\n        \"\"\"\n        # 按照给定的batch_size_per_image, positive_fraction选择正负样本\n        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)\n        # 将一个batch中的所有正负样本List(Tensor)分别拼接在一起，并获取非零位置的索引\n        # sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1)\n        sampled_pos_inds = torch.where(torch.cat(sampled_pos_inds, dim=0))[0]\n        # sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1)\n        sampled_neg_inds = torch.where(torch.cat(sampled_neg_inds, dim=0))[0]\n\n        # 将所有正负样本索引拼接在一起\n        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)\n        objectness = objectness.flatten()\n\n        labels = torch.cat(labels, dim=0)\n        regression_targets = torch.cat(regression_targets, dim=0)\n\n        # 计算边界框回归损失\n        box_loss = det_utils.smooth_l1_loss(\n            pred_bbox_deltas[sampled_pos_inds],\n            regression_targets[sampled_pos_inds],\n            beta=1 / 9,\n            size_average=False,\n        ) / (sampled_inds.numel())\n\n        # 计算目标预测概率损失\n        objectness_loss = F.binary_cross_entropy_with_logits(\n            objectness[sampled_inds], labels[sampled_inds]\n        )\n\n        return objectness_loss, box_loss\n\n    def forward(self,\n                images,        # type: ImageList\n                features,      # type: Dict[str, Tensor]\n                targets=None   # type: Optional[List[Dict[str, Tensor]]]\n                ):\n        # type: (...) -> Tuple[List[Tensor], Dict[str, Tensor]]\n        \"\"\"\n        Arguments:\n            images (ImageList): images for which we want to compute the predictions\n            features (Dict[Tensor]): features computed from the images that are\n                used for computing the predictions. Each tensor in the list\n                correspond to different feature levels\n            targets (List[Dict[Tensor]): ground-truth boxes present in the image (optional).\n                If provided, each element in the dict should contain a field `boxes`,\n                with the locations of the ground-truth boxes.\n\n        Returns:\n            boxes (List[Tensor]): the predicted boxes from the RPN, one Tensor per\n                image.\n            losses (Dict[Tensor]): the losses for the model during training. During\n                testing, it is an empty dict.\n        \"\"\"\n        # RPN uses all feature maps that are available\n        # features是所有预测特征层组成的OrderedDict\n        features = list(features.values())\n\n        # 计算每个预测特征层上的预测目标概率和bboxes regression参数\n        # objectness和pred_bbox_deltas都是list\n        objectness, pred_bbox_deltas = self.head(features)\n\n        # 生成一个batch图像的所有anchors信息,list(tensor)元素个数等于batch_size\n        anchors = self.anchor_generator(images, features)\n\n        # batch_size\n        num_images = len(anchors)\n\n        # numel() Returns the total number of elements in the input tensor.\n        # 计算每个预测特征层上的对应的anchors数量\n        num_anchors_per_level_shape_tensors = [o[0].shape for o in objectness]\n        num_anchors_per_level = [s[0] * s[1] * s[2] for s in num_anchors_per_level_shape_tensors]\n\n        # 调整内部tensor格式以及shape\n        objectness, pred_bbox_deltas = concat_box_prediction_layers(objectness,\n                                                                    pred_bbox_deltas)\n\n        # apply pred_bbox_deltas to anchors to obtain the decoded proposals\n        # note that we detach the deltas because Faster R-CNN do not backprop through\n        # the proposals\n        # 将预测的bbox regression参数应用到anchors上得到最终预测bbox坐标\n        proposals = self.box_coder.decode(pred_bbox_deltas.detach(), anchors)\n        proposals = proposals.view(num_images, -1, 4)\n\n        # 筛除小boxes框，nms处理，根据预测概率获取前post_nms_top_n个目标\n        boxes, scores = self.filter_proposals(proposals, objectness, images.image_sizes, num_anchors_per_level)\n\n        losses = {}\n        if self.training:\n            assert targets is not None\n            # 计算每个anchors最匹配的gt，并将anchors进行分类，前景，背景以及废弃的anchors\n            labels, matched_gt_boxes = self.assign_targets_to_anchors(anchors, targets)\n            # 结合anchors以及对应的gt，计算regression参数\n            regression_targets = self.box_coder.encode(matched_gt_boxes, anchors)\n            loss_objectness, loss_rpn_box_reg = self.compute_loss(\n                objectness, pred_bbox_deltas, labels, regression_targets\n            )\n            losses = {\n                \"loss_objectness\": loss_objectness,\n                \"loss_rpn_box_reg\": loss_rpn_box_reg\n            }\n        return boxes, losses\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/network_files/transform.py",
    "content": "import math\nfrom typing import List, Tuple, Dict, Optional\n\nimport torch\nfrom torch import nn, Tensor\nimport torch.nn.functional as F\nimport torchvision\n\nfrom .image_list import ImageList\n\n\ndef _onnx_paste_mask_in_image(mask, box, im_h, im_w):\n    one = torch.ones(1, dtype=torch.int64)\n    zero = torch.zeros(1, dtype=torch.int64)\n\n    w = box[2] - box[0] + one\n    h = box[3] - box[1] + one\n    w = torch.max(torch.cat((w, one)))\n    h = torch.max(torch.cat((h, one)))\n\n    # Set shape to [batchxCxHxW]\n    mask = mask.expand((1, 1, mask.size(0), mask.size(1)))\n\n    # Resize mask\n    mask = F.interpolate(mask, size=(int(h), int(w)), mode=\"bilinear\", align_corners=False)\n    mask = mask[0][0]\n\n    x_0 = torch.max(torch.cat((box[0].unsqueeze(0), zero)))\n    x_1 = torch.min(torch.cat((box[2].unsqueeze(0) + one, im_w.unsqueeze(0))))\n    y_0 = torch.max(torch.cat((box[1].unsqueeze(0), zero)))\n    y_1 = torch.min(torch.cat((box[3].unsqueeze(0) + one, im_h.unsqueeze(0))))\n\n    unpaded_im_mask = mask[(y_0 - box[1]): (y_1 - box[1]), (x_0 - box[0]): (x_1 - box[0])]\n\n    # TODO : replace below with a dynamic padding when support is added in ONNX\n\n    # pad y\n    zeros_y0 = torch.zeros(y_0, unpaded_im_mask.size(1))\n    zeros_y1 = torch.zeros(im_h - y_1, unpaded_im_mask.size(1))\n    concat_0 = torch.cat((zeros_y0, unpaded_im_mask.to(dtype=torch.float32), zeros_y1), 0)[0:im_h, :]\n    # pad x\n    zeros_x0 = torch.zeros(concat_0.size(0), x_0)\n    zeros_x1 = torch.zeros(concat_0.size(0), im_w - x_1)\n    im_mask = torch.cat((zeros_x0, concat_0, zeros_x1), 1)[:, :im_w]\n    return im_mask\n\n\n@torch.jit._script_if_tracing\ndef _onnx_paste_mask_in_image_loop(masks, boxes, im_h, im_w):\n    res_append = torch.zeros(0, im_h, im_w)\n    for i in range(masks.size(0)):\n        mask_res = _onnx_paste_mask_in_image(masks[i][0], boxes[i], im_h, im_w)\n        mask_res = mask_res.unsqueeze(0)\n        res_append = torch.cat((res_append, mask_res))\n\n    return res_append\n\n\n@torch.jit.unused\ndef _get_shape_onnx(image: Tensor) -> Tensor:\n    from torch.onnx import operators\n\n    return operators.shape_as_tensor(image)[-2:]\n\n\n@torch.jit.unused\ndef _fake_cast_onnx(v: Tensor) -> float:\n    # ONNX requires a tensor but here we fake its type for JIT.\n    return v\n\n\ndef _resize_image_and_masks(image: Tensor,\n                            self_min_size: float,\n                            self_max_size: float,\n                            target: Optional[Dict[str, Tensor]] = None,\n                            fixed_size: Optional[Tuple[int, int]] = None\n                            ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:\n\n    if torchvision._is_tracing():\n        im_shape = _get_shape_onnx(image)\n    else:\n        im_shape = torch.tensor(image.shape[-2:])\n\n    size: Optional[List[int]] = None\n    scale_factor: Optional[float] = None\n    recompute_scale_factor: Optional[bool] = None\n    if fixed_size is not None:\n        size = [fixed_size[1], fixed_size[0]]\n    else:\n        min_size = torch.min(im_shape).to(dtype=torch.float32)  # 获取高宽中的最小值\n        max_size = torch.max(im_shape).to(dtype=torch.float32)  # 获取高宽中的最大值\n        scale = torch.min(self_min_size / min_size, self_max_size / max_size)  # 计算缩放比例\n\n        if torchvision._is_tracing():\n            scale_factor = _fake_cast_onnx(scale)\n        else:\n            scale_factor = scale.item()\n        recompute_scale_factor = True\n\n    # interpolate利用插值的方法缩放图片\n    # image[None]操作是在最前面添加batch维度[C, H, W] -> [1, C, H, W]\n    # bilinear只支持4D Tensor\n    image = torch.nn.functional.interpolate(\n        image[None],\n        size=size,\n        scale_factor=scale_factor,\n        mode=\"bilinear\",\n        recompute_scale_factor=recompute_scale_factor,\n        align_corners=False)[0]\n\n    if target is None:\n        return image, target\n\n    if \"masks\" in target:\n        mask = target[\"masks\"]\n        mask = torch.nn.functional.interpolate(\n            mask[:, None].float(), size=size, scale_factor=scale_factor, recompute_scale_factor=recompute_scale_factor\n        )[:, 0].byte()  # self.byte() is equivalent to self.to(torch.uint8).\n        target[\"masks\"] = mask\n\n    return image, target\n\n\ndef _onnx_expand_boxes(boxes, scale):\n    # type: (Tensor, float) -> Tensor\n    w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5\n    h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5\n    x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5\n    y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5\n\n    w_half = w_half.to(dtype=torch.float32) * scale\n    h_half = h_half.to(dtype=torch.float32) * scale\n\n    boxes_exp0 = x_c - w_half\n    boxes_exp1 = y_c - h_half\n    boxes_exp2 = x_c + w_half\n    boxes_exp3 = y_c + h_half\n    boxes_exp = torch.stack((boxes_exp0, boxes_exp1, boxes_exp2, boxes_exp3), 1)\n    return boxes_exp\n\n\n# the next two functions should be merged inside Masker\n# but are kept here for the moment while we need them\n# temporarily for paste_mask_in_image\ndef expand_boxes(boxes, scale):\n    # type: (Tensor, float) -> Tensor\n    if torchvision._is_tracing():\n        return _onnx_expand_boxes(boxes, scale)\n    w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5\n    h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5\n    x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5\n    y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5\n\n    w_half *= scale\n    h_half *= scale\n\n    boxes_exp = torch.zeros_like(boxes)\n    boxes_exp[:, 0] = x_c - w_half\n    boxes_exp[:, 2] = x_c + w_half\n    boxes_exp[:, 1] = y_c - h_half\n    boxes_exp[:, 3] = y_c + h_half\n    return boxes_exp\n\n\n@torch.jit.unused\ndef expand_masks_tracing_scale(M, padding):\n    # type: (int, int) -> float\n    return torch.tensor(M + 2 * padding).to(torch.float32) / torch.tensor(M).to(torch.float32)\n\n\ndef expand_masks(mask, padding):\n    # type: (Tensor, int) -> Tuple[Tensor, float]\n    M = mask.shape[-1]\n    if torch._C._get_tracing_state():  # could not import is_tracing(), not sure why\n        scale = expand_masks_tracing_scale(M, padding)\n    else:\n        scale = float(M + 2 * padding) / M\n    padded_mask = F.pad(mask, (padding,) * 4)\n    return padded_mask, scale\n\n\ndef paste_mask_in_image(mask, box, im_h, im_w):\n    # type: (Tensor, Tensor, int, int) -> Tensor\n\n    # refer to: https://github.com/pytorch/vision/issues/5845\n    TO_REMOVE = 1\n    w = int(box[2] - box[0] + TO_REMOVE)\n    h = int(box[3] - box[1] + TO_REMOVE)\n    w = max(w, 1)\n    h = max(h, 1)\n\n    # Set shape to [batch, C, H, W]\n    # 因为后续的bilinear操作只支持4-D的Tensor\n    mask = mask.expand((1, 1, -1, -1))  # -1 means not changing the size of that dimension\n\n    # Resize mask\n    mask = F.interpolate(mask, size=(h, w), mode='bilinear', align_corners=False)\n    mask = mask[0][0]  # [batch, C, H, W] -> [H, W]\n\n    im_mask = torch.zeros((im_h, im_w), dtype=mask.dtype, device=mask.device)\n    # 填入原图的目标区域(防止越界)\n    x_0 = max(box[0], 0)\n    x_1 = min(box[2] + 1, im_w)\n    y_0 = max(box[1], 0)\n    y_1 = min(box[3] + 1, im_h)\n\n    # 将resize后的mask填入对应目标区域\n    im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - box[1]):(y_1 - box[1]), (x_0 - box[0]):(x_1 - box[0])]\n    return im_mask\n\n\ndef paste_masks_in_image(masks, boxes, img_shape, padding=1):\n    # type: (Tensor, Tensor, Tuple[int, int], int) -> Tensor\n\n    # pytorch官方说对mask进行expand能够略微提升mAP\n    # refer to: https://github.com/pytorch/vision/issues/5845\n    masks, scale = expand_masks(masks, padding=padding)\n    boxes = expand_boxes(boxes, scale).to(dtype=torch.int64)\n    im_h, im_w = img_shape\n\n    if torchvision._is_tracing():\n        return _onnx_paste_mask_in_image_loop(\n            masks, boxes, torch.scalar_tensor(im_h, dtype=torch.int64), torch.scalar_tensor(im_w, dtype=torch.int64)\n        )[:, None]\n    res = [paste_mask_in_image(m[0], b, im_h, im_w) for m, b in zip(masks, boxes)]\n    if len(res) > 0:\n        ret = torch.stack(res, dim=0)[:, None]  # [num_obj, 1, H, W]\n    else:\n        ret = masks.new_empty((0, 1, im_h, im_w))\n    return ret\n\n\nclass GeneralizedRCNNTransform(nn.Module):\n    \"\"\"\n    Performs input / target transformation before feeding the data to a GeneralizedRCNN\n    model.\n\n    The transformations it perform are:\n        - input normalization (mean subtraction and std division)\n        - input / target resizing to match min_size / max_size\n\n    It returns a ImageList for the inputs, and a List[Dict[Tensor]] for the targets\n    \"\"\"\n\n    def __init__(self,\n                 min_size: int,\n                 max_size: int,\n                 image_mean: List[float],\n                 image_std: List[float],\n                 size_divisible: int = 32,\n                 fixed_size: Optional[Tuple[int, int]] = None):\n        super().__init__()\n        if not isinstance(min_size, (list, tuple)):\n            min_size = (min_size,)\n        self.min_size = min_size      # 指定图像的最小边长范围\n        self.max_size = max_size      # 指定图像的最大边长范围\n        self.image_mean = image_mean  # 指定图像在标准化处理中的均值\n        self.image_std = image_std    # 指定图像在标准化处理中的方差\n        self.size_divisible = size_divisible\n        self.fixed_size = fixed_size\n\n    def normalize(self, image):\n        \"\"\"标准化处理\"\"\"\n        dtype, device = image.dtype, image.device\n        mean = torch.as_tensor(self.image_mean, dtype=dtype, device=device)\n        std = torch.as_tensor(self.image_std, dtype=dtype, device=device)\n        # [:, None, None]: shape [3] -> [3, 1, 1]\n        return (image - mean[:, None, None]) / std[:, None, None]\n\n    def torch_choice(self, k):\n        # type: (List[int]) -> int\n        \"\"\"\n        Implements `random.choice` via torch ops so it can be compiled with\n        TorchScript. Remove if https://github.com/pytorch/pytorch/issues/25803\n        is fixed.\n        \"\"\"\n        index = int(torch.empty(1).uniform_(0., float(len(k))).item())\n        return k[index]\n\n    def resize(self, image, target):\n        # type: (Tensor, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]\n        \"\"\"\n        将图片缩放到指定的大小范围内，并对应缩放bboxes信息\n        Args:\n            image: 输入的图片\n            target: 输入图片的相关信息（包括bboxes信息）\n\n        Returns:\n            image: 缩放后的图片\n            target: 缩放bboxes后的图片相关信息\n        \"\"\"\n        # image shape is [channel, height, width]\n        h, w = image.shape[-2:]\n\n        if self.training:\n            size = float(self.torch_choice(self.min_size))  # 指定输入图片的最小边长,注意是self.min_size不是min_size\n        else:\n            # FIXME assume for now that testing uses the largest scale\n            size = float(self.min_size[-1])    # 指定输入图片的最小边长,注意是self.min_size不是min_size\n\n        image, target = _resize_image_and_masks(image, size, float(self.max_size), target, self.fixed_size)\n\n        if target is None:\n            return image, target\n\n        bbox = target[\"boxes\"]\n        # 根据图像的缩放比例来缩放bbox\n        bbox = resize_boxes(bbox, [h, w], image.shape[-2:])\n        target[\"boxes\"] = bbox\n\n        return image, target\n\n    # _onnx_batch_images() is an implementation of\n    # batch_images() that is supported by ONNX tracing.\n    @torch.jit.unused\n    def _onnx_batch_images(self, images, size_divisible=32):\n        # type: (List[Tensor], int) -> Tensor\n        max_size = []\n        for i in range(images[0].dim()):\n            max_size_i = torch.max(torch.stack([img.shape[i] for img in images]).to(torch.float32)).to(torch.int64)\n            max_size.append(max_size_i)\n        stride = size_divisible\n        max_size[1] = (torch.ceil((max_size[1].to(torch.float32)) / stride) * stride).to(torch.int64)\n        max_size[2] = (torch.ceil((max_size[2].to(torch.float32)) / stride) * stride).to(torch.int64)\n        max_size = tuple(max_size)\n\n        # work around for\n        # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)\n        # which is not yet supported in onnx\n        padded_imgs = []\n        for img in images:\n            padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]\n            padded_img = torch.nn.functional.pad(img, [0, padding[2], 0, padding[1], 0, padding[0]])\n            padded_imgs.append(padded_img)\n\n        return torch.stack(padded_imgs)\n\n    def max_by_axis(self, the_list):\n        # type: (List[List[int]]) -> List[int]\n        maxes = the_list[0]\n        for sublist in the_list[1:]:\n            for index, item in enumerate(sublist):\n                maxes[index] = max(maxes[index], item)\n        return maxes\n\n    def batch_images(self, images, size_divisible=32):\n        # type: (List[Tensor], int) -> Tensor\n        \"\"\"\n        将一批图像打包成一个batch返回（注意batch中每个tensor的shape是相同的）\n        Args:\n            images: 输入的一批图片\n            size_divisible: 将图像高和宽调整到该数的整数倍\n\n        Returns:\n            batched_imgs: 打包成一个batch后的tensor数据\n        \"\"\"\n\n        if torchvision._is_tracing():\n            # batch_images() does not export well to ONNX\n            # call _onnx_batch_images() instead\n            return self._onnx_batch_images(images, size_divisible)\n\n        # 分别计算一个batch中所有图片中的最大channel, height, width\n        max_size = self.max_by_axis([list(img.shape) for img in images])\n\n        stride = float(size_divisible)\n        # max_size = list(max_size)\n        # 将height向上调整到stride的整数倍\n        max_size[1] = int(math.ceil(float(max_size[1]) / stride) * stride)\n        # 将width向上调整到stride的整数倍\n        max_size[2] = int(math.ceil(float(max_size[2]) / stride) * stride)\n\n        # [batch, channel, height, width]\n        batch_shape = [len(images)] + max_size\n\n        # 创建shape为batch_shape且值全部为0的tensor\n        batched_imgs = images[0].new_full(batch_shape, 0)\n        for img, pad_img in zip(images, batched_imgs):\n            # 将输入images中的每张图片复制到新的batched_imgs的每张图片中，对齐左上角，保证bboxes的坐标不变\n            # 这样保证输入到网络中一个batch的每张图片的shape相同\n            # copy_: Copies the elements from src into self tensor and returns self\n            pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)\n\n        return batched_imgs\n\n    def postprocess(self,\n                    result,                # type: List[Dict[str, Tensor]]\n                    image_shapes,          # type: List[Tuple[int, int]]\n                    original_image_sizes   # type: List[Tuple[int, int]]\n                    ):\n        # type: (...) -> List[Dict[str, Tensor]]\n        \"\"\"\n        对网络的预测结果进行后处理（主要将bboxes还原到原图像尺度上）\n        Args:\n            result: list(dict), 网络的预测结果, len(result) == batch_size\n            image_shapes: list(torch.Size), 图像预处理缩放后的尺寸, len(image_shapes) == batch_size\n            original_image_sizes: list(torch.Size), 图像的原始尺寸, len(original_image_sizes) == batch_size\n\n        Returns:\n\n        \"\"\"\n        if self.training:\n            return result\n\n        # 遍历每张图片的预测信息，将boxes信息还原回原尺度\n        for i, (pred, im_s, o_im_s) in enumerate(zip(result, image_shapes, original_image_sizes)):\n            boxes = pred[\"boxes\"]\n            boxes = resize_boxes(boxes, im_s, o_im_s)  # 将bboxes缩放回原图像尺度上\n            result[i][\"boxes\"] = boxes\n            if \"masks\" in pred:\n                masks = pred[\"masks\"]\n                # 将mask映射回原图尺度\n                masks = paste_masks_in_image(masks, boxes, o_im_s)\n                result[i][\"masks\"] = masks\n\n        return result\n\n    def __repr__(self):\n        \"\"\"自定义输出实例化对象的信息，可通过print打印实例信息\"\"\"\n        format_string = self.__class__.__name__ + '('\n        _indent = '\\n    '\n        format_string += \"{0}Normalize(mean={1}, std={2})\".format(_indent, self.image_mean, self.image_std)\n        format_string += \"{0}Resize(min_size={1}, max_size={2}, mode='bilinear')\".format(_indent, self.min_size,\n                                                                                         self.max_size)\n        format_string += '\\n)'\n        return format_string\n\n    def forward(self,\n                images,       # type: List[Tensor]\n                targets=None  # type: Optional[List[Dict[str, Tensor]]]\n                ):\n        # type: (...) -> Tuple[ImageList, Optional[List[Dict[str, Tensor]]]]\n        images = [img for img in images]\n        for i in range(len(images)):\n            image = images[i]\n            target_index = targets[i] if targets is not None else None\n\n            if image.dim() != 3:\n                raise ValueError(\"images is expected to be a list of 3d tensors \"\n                                 \"of shape [C, H, W], got {}\".format(image.shape))\n            image = self.normalize(image)  # 对图像进行标准化处理\n            image, target_index = self.resize(image, target_index)  # 对图像和对应的bboxes缩放到指定范围\n            images[i] = image\n            if targets is not None and target_index is not None:\n                targets[i] = target_index\n\n        # 记录resize后的图像尺寸\n        image_sizes = [img.shape[-2:] for img in images]\n        images = self.batch_images(images, self.size_divisible)  # 将images打包成一个batch\n        image_sizes_list = torch.jit.annotate(List[Tuple[int, int]], [])\n\n        for image_size in image_sizes:\n            assert len(image_size) == 2\n            image_sizes_list.append((image_size[0], image_size[1]))\n\n        image_list = ImageList(images, image_sizes_list)\n        return image_list, targets\n\n\ndef resize_boxes(boxes, original_size, new_size):\n    # type: (Tensor, List[int], List[int]) -> Tensor\n    \"\"\"\n    将boxes参数根据图像的缩放情况进行相应缩放\n\n    Arguments:\n        original_size: 图像缩放前的尺寸\n        new_size: 图像缩放后的尺寸\n    \"\"\"\n    ratios = [\n        torch.tensor(s, dtype=torch.float32, device=boxes.device) /\n        torch.tensor(s_orig, dtype=torch.float32, device=boxes.device)\n        for s, s_orig in zip(new_size, original_size)\n    ]\n    ratios_height, ratios_width = ratios\n    # Removes a tensor dimension, boxes [minibatch, 4]\n    # Returns a tuple of all slices along a given dimension, already without it.\n    xmin, ymin, xmax, ymax = boxes.unbind(1)\n    xmin = xmin * ratios_width\n    xmax = xmax * ratios_width\n    ymin = ymin * ratios_height\n    ymax = ymax * ratios_height\n    return torch.stack((xmin, ymin, xmax, ymax), dim=1)\n\n\n\n\n\n\n\n\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/pascal_voc_indices.json",
    "content": "{\n    \"1\": \"aeroplane\",\n    \"2\": \"bicycle\",\n    \"3\": \"bird\",\n    \"4\": \"boat\",\n    \"5\": \"bottle\",\n    \"6\": \"bus\",\n    \"7\": \"car\",\n    \"8\": \"cat\",\n    \"9\": \"chair\",\n    \"10\": \"cow\",\n    \"11\": \"diningtable\",\n    \"12\": \"dog\",\n    \"13\": \"horse\",\n    \"14\": \"motorbike\",\n    \"15\": \"person\",\n    \"16\": \"pottedplant\",\n    \"17\": \"sheep\",\n    \"18\": \"sofa\",\n    \"19\": \"train\",\n    \"20\": \"tvmonitor\"\n}"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/plot_curve.py",
    "content": "import datetime\nimport matplotlib.pyplot as plt\n\n\ndef plot_loss_and_lr(train_loss, learning_rate):\n    try:\n        x = list(range(len(train_loss)))\n        fig, ax1 = plt.subplots(1, 1)\n        ax1.plot(x, train_loss, 'r', label='loss')\n        ax1.set_xlabel(\"step\")\n        ax1.set_ylabel(\"loss\")\n        ax1.set_title(\"Train Loss and lr\")\n        plt.legend(loc='best')\n\n        ax2 = ax1.twinx()\n        ax2.plot(x, learning_rate, label='lr')\n        ax2.set_ylabel(\"learning rate\")\n        ax2.set_xlim(0, len(train_loss))  # 设置横坐标整数间隔\n        plt.legend(loc='best')\n\n        handles1, labels1 = ax1.get_legend_handles_labels()\n        handles2, labels2 = ax2.get_legend_handles_labels()\n        plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right')\n\n        fig.subplots_adjust(right=0.8)  # 防止出现保存图片显示不全的情况\n        fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")))\n        plt.close()\n        print(\"successful save loss curve! \")\n    except Exception as e:\n        print(e)\n\n\ndef plot_map(mAP):\n    try:\n        x = list(range(len(mAP)))\n        plt.plot(x, mAP, label='mAp')\n        plt.xlabel('epoch')\n        plt.ylabel('mAP')\n        plt.title('Eval mAP')\n        plt.xlim(0, len(mAP))\n        plt.legend(loc='best')\n        plt.savefig('./mAP.png')\n        plt.close()\n        print(\"successful save mAP curve!\")\n    except Exception as e:\n        print(e)\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/predict.py",
    "content": "import os\nimport time\nimport json\n\nimport numpy as np\nfrom PIL import Image\nimport matplotlib.pyplot as plt\nimport torch\nfrom torchvision import transforms\n\nfrom network_files import MaskRCNN\nfrom backbone import resnet50_fpn_backbone\nfrom draw_box_utils import draw_objs\n\n\ndef create_model(num_classes, box_thresh=0.5):\n    backbone = resnet50_fpn_backbone()\n    model = MaskRCNN(backbone,\n                     num_classes=num_classes,\n                     rpn_score_thresh=box_thresh,\n                     box_score_thresh=box_thresh)\n\n    return model\n\n\ndef time_synchronized():\n    torch.cuda.synchronize() if torch.cuda.is_available() else None\n    return time.time()\n\n\ndef main():\n    num_classes = 90  # 不包含背景\n    box_thresh = 0.5\n    weights_path = \"./save_weights/model_25.pth\"\n    img_path = \"./test.jpg\"\n    label_json_path = './coco91_indices.json'\n\n    # get devices\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n    print(\"using {} device.\".format(device))\n\n    # create model\n    model = create_model(num_classes=num_classes + 1, box_thresh=box_thresh)\n\n    # load train weights\n    assert os.path.exists(weights_path), \"{} file dose not exist.\".format(weights_path)\n    weights_dict = torch.load(weights_path, map_location='cpu')\n    weights_dict = weights_dict[\"model\"] if \"model\" in weights_dict else weights_dict\n    model.load_state_dict(weights_dict)\n    model.to(device)\n\n    # read class_indict\n    assert os.path.exists(label_json_path), \"json file {} dose not exist.\".format(label_json_path)\n    with open(label_json_path, 'r') as json_file:\n        category_index = json.load(json_file)\n\n    # load image\n    assert os.path.exists(img_path), f\"{img_path} does not exits.\"\n    original_img = Image.open(img_path).convert('RGB')\n\n    # from pil image to tensor, do not normalize image\n    data_transform = transforms.Compose([transforms.ToTensor()])\n    img = data_transform(original_img)\n    # expand batch dimension\n    img = torch.unsqueeze(img, dim=0)\n\n    model.eval()  # 进入验证模式\n    with torch.no_grad():\n        # init\n        img_height, img_width = img.shape[-2:]\n        init_img = torch.zeros((1, 3, img_height, img_width), device=device)\n        model(init_img)\n\n        t_start = time_synchronized()\n        predictions = model(img.to(device))[0]\n        t_end = time_synchronized()\n        print(\"inference+NMS time: {}\".format(t_end - t_start))\n\n        predict_boxes = predictions[\"boxes\"].to(\"cpu\").numpy()\n        predict_classes = predictions[\"labels\"].to(\"cpu\").numpy()\n        predict_scores = predictions[\"scores\"].to(\"cpu\").numpy()\n        predict_mask = predictions[\"masks\"].to(\"cpu\").numpy()\n        predict_mask = np.squeeze(predict_mask, axis=1)  # [batch, 1, h, w] -> [batch, h, w]\n\n        if len(predict_boxes) == 0:\n            print(\"没有检测到任何目标!\")\n            return\n\n        plot_img = draw_objs(original_img,\n                             boxes=predict_boxes,\n                             classes=predict_classes,\n                             scores=predict_scores,\n                             masks=predict_mask,\n                             category_index=category_index,\n                             line_thickness=3,\n                             font='arial.ttf',\n                             font_size=20)\n        plt.imshow(plot_img)\n        plt.show()\n        # 保存预测的图片结果\n        plot_img.save(\"test_result.jpg\")\n\n\nif __name__ == '__main__':\n    main()\n\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/requirements.txt",
    "content": "lxml\nmatplotlib\nnumpy\ntqdm\npycocotools\nPillow\ntorch==1.13.1\ntorchvision==0.11.1\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/seg_results20220406-141544.txt",
    "content": "epoch:0 0.172  0.321  0.167  0.065  0.195  0.250  0.188  0.307  0.324  0.147  0.366  0.440  1.3826  0.08\nepoch:1 0.223  0.395  0.225  0.092  0.249  0.322  0.222  0.354  0.372  0.186  0.413  0.499  1.0356  0.08\nepoch:2 0.235  0.408  0.241  0.100  0.258  0.350  0.230  0.372  0.392  0.204  0.429  0.517  0.9718  0.08\nepoch:3 0.246  0.426  0.252  0.103  0.267  0.357  0.241  0.386  0.408  0.225  0.448  0.521  0.9363  0.08\nepoch:4 0.250  0.424  0.257  0.106  0.272  0.367  0.242  0.381  0.400  0.210  0.438  0.530  0.9145  0.08\nepoch:5 0.255  0.434  0.262  0.109  0.279  0.375  0.242  0.379  0.398  0.209  0.433  0.534  0.8982  0.08\nepoch:6 0.270  0.456  0.283  0.120  0.293  0.392  0.254  0.403  0.421  0.229  0.462  0.551  0.8859  0.08\nepoch:7 0.269  0.455  0.280  0.118  0.296  0.388  0.257  0.402  0.421  0.228  0.454  0.564  0.8771  0.08\nepoch:8 0.276  0.465  0.290  0.120  0.301  0.398  0.255  0.401  0.418  0.227  0.461  0.553  0.8685  0.08\nepoch:9 0.271  0.458  0.282  0.113  0.297  0.404  0.253  0.398  0.417  0.211  0.460  0.570  0.8612  0.08\nepoch:10 0.277  0.463  0.289  0.119  0.299  0.410  0.258  0.405  0.425  0.221  0.466  0.558  0.8547  0.08\nepoch:11 0.276  0.463  0.287  0.122  0.304  0.405  0.259  0.406  0.425  0.236  0.466  0.559  0.8498  0.08\nepoch:12 0.276  0.464  0.288  0.127  0.294  0.409  0.257  0.406  0.425  0.236  0.459  0.563  0.8461  0.08\nepoch:13 0.284  0.477  0.296  0.124  0.311  0.412  0.262  0.407  0.429  0.229  0.474  0.555  0.8409  0.08\nepoch:14 0.277  0.464  0.292  0.121  0.304  0.397  0.257  0.410  0.431  0.238  0.473  0.565  0.8355  0.08\nepoch:15 0.282  0.474  0.296  0.121  0.308  0.413  0.264  0.411  0.432  0.231  0.473  0.575  0.833  0.08\nepoch:16 0.336  0.549  0.356  0.149  0.367  0.491  0.288  0.451  0.473  0.269  0.519  0.620  0.7421  0.008\nepoch:17 0.339  0.553  0.360  0.153  0.371  0.496  0.292  0.454  0.475  0.271  0.518  0.624  0.7157  0.008\nepoch:18 0.340  0.553  0.361  0.150  0.371  0.494  0.290  0.453  0.473  0.269  0.516  0.620  0.7016  0.008\nepoch:19 0.341  0.555  0.363  0.154  0.372  0.500  0.293  0.458  0.478  0.273  0.522  0.630  0.6897  0.008\nepoch:20 0.340  0.554  0.361  0.154  0.370  0.496  0.289  0.450  0.471  0.266  0.514  0.622  0.6802  0.008\nepoch:21 0.338  0.552  0.358  0.151  0.367  0.500  0.289  0.447  0.467  0.262  0.507  0.622  0.6708  0.008\nepoch:22 0.340  0.553  0.360  0.151  0.370  0.500  0.290  0.450  0.470  0.267  0.513  0.623  0.6497  0.0008\nepoch:23 0.340  0.552  0.361  0.151  0.369  0.500  0.290  0.449  0.468  0.266  0.509  0.619  0.6447  0.0008\nepoch:24 0.339  0.552  0.359  0.150  0.369  0.500  0.290  0.448  0.468  0.264  0.510  0.619  0.6421  0.0008\nepoch:25 0.338  0.551  0.359  0.152  0.367  0.500  0.289  0.448  0.467  0.264  0.509  0.618  0.6398  0.0008\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/train.py",
    "content": "import os\nimport datetime\n\nimport torch\nfrom torchvision.ops.misc import FrozenBatchNorm2d\n\nimport transforms\nfrom network_files import MaskRCNN\nfrom backbone import resnet50_fpn_backbone\nfrom my_dataset_coco import CocoDetection\nfrom my_dataset_voc import VOCInstances\nfrom train_utils import train_eval_utils as utils\nfrom train_utils import GroupedBatchSampler, create_aspect_ratio_groups\n\n\ndef create_model(num_classes, load_pretrain_weights=True):\n    # 如果GPU显存很小，batch_size不能设置很大，建议将norm_layer设置成FrozenBatchNorm2d(默认是nn.BatchNorm2d)\n    # FrozenBatchNorm2d的功能与BatchNorm2d类似，但参数无法更新\n    # trainable_layers包括['layer4', 'layer3', 'layer2', 'layer1', 'conv1']， 5代表全部训练\n    # backbone = resnet50_fpn_backbone(norm_layer=FrozenBatchNorm2d,\n    #                                  trainable_layers=3)\n    # resnet50 imagenet weights url: https://download.pytorch.org/models/resnet50-0676ba61.pth\n    backbone = resnet50_fpn_backbone(pretrain_path=\"resnet50.pth\", trainable_layers=3)\n\n    model = MaskRCNN(backbone, num_classes=num_classes)\n\n    if load_pretrain_weights:\n        # coco weights url: \"https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth\"\n        weights_dict = torch.load(\"./maskrcnn_resnet50_fpn_coco.pth\", map_location=\"cpu\")\n        for k in list(weights_dict.keys()):\n            if (\"box_predictor\" in k) or (\"mask_fcn_logits\" in k):\n                del weights_dict[k]\n\n        print(model.load_state_dict(weights_dict, strict=False))\n\n    return model\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n    print(\"Using {} device training.\".format(device.type))\n\n    # 用来保存coco_info的文件\n    now = datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n    det_results_file = f\"det_results{now}.txt\"\n    seg_results_file = f\"seg_results{now}.txt\"\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.ToTensor(),\n                                     transforms.RandomHorizontalFlip(0.5)]),\n        \"val\": transforms.Compose([transforms.ToTensor()])\n    }\n\n    data_root = args.data_path\n\n    # load train data set\n    # coco2017 -> annotations -> instances_train2017.json\n    train_dataset = CocoDetection(data_root, \"train\", data_transform[\"train\"])\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt\n    # train_dataset = VOCInstances(data_root, year=\"2012\", txt_name=\"train.txt\", transforms=data_transform[\"train\"])\n    train_sampler = None\n\n    # 是否按图片相似高宽比采样图片组成batch\n    # 使用的话能够减小训练时所需GPU显存，默认使用\n    if args.aspect_ratio_group_factor >= 0:\n        train_sampler = torch.utils.data.RandomSampler(train_dataset)\n        # 统计所有图像高宽比例在bins区间中的位置索引\n        group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor)\n        # 每个batch图片从同一高宽比例区间中取\n        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)\n\n    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch\n    batch_size = args.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using %g dataloader workers' % nw)\n\n    if train_sampler:\n        # 如果按照图片高宽比采样图片，dataloader中需要使用batch_sampler\n        train_data_loader = torch.utils.data.DataLoader(train_dataset,\n                                                        batch_sampler=train_batch_sampler,\n                                                        pin_memory=True,\n                                                        num_workers=nw,\n                                                        collate_fn=train_dataset.collate_fn)\n    else:\n        train_data_loader = torch.utils.data.DataLoader(train_dataset,\n                                                        batch_size=batch_size,\n                                                        shuffle=True,\n                                                        pin_memory=True,\n                                                        num_workers=nw,\n                                                        collate_fn=train_dataset.collate_fn)\n\n    # load validation data set\n    # coco2017 -> annotations -> instances_val2017.json\n    val_dataset = CocoDetection(data_root, \"val\", data_transform[\"val\"])\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt\n    # val_dataset = VOCInstances(data_root, year=\"2012\", txt_name=\"val.txt\", transforms=data_transform[\"val\"])\n    val_data_loader = torch.utils.data.DataLoader(val_dataset,\n                                                  batch_size=1,\n                                                  shuffle=False,\n                                                  pin_memory=True,\n                                                  num_workers=nw,\n                                                  collate_fn=train_dataset.collate_fn)\n\n    # create model num_classes equal background + classes\n    model = create_model(num_classes=args.num_classes + 1, load_pretrain_weights=args.pretrain)\n    model.to(device)\n\n    train_loss = []\n    learning_rate = []\n    val_map = []\n\n    # define optimizer\n    params = [p for p in model.parameters() if p.requires_grad]\n    optimizer = torch.optim.SGD(params, lr=args.lr,\n                                momentum=args.momentum,\n                                weight_decay=args.weight_decay)\n\n    scaler = torch.cuda.amp.GradScaler() if args.amp else None\n\n    # learning rate scheduler\n    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,\n                                                        milestones=args.lr_steps,\n                                                        gamma=args.lr_gamma)\n    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练\n    if args.resume:\n        # If map_location is missing, torch.load will first load the module to CPU\n        # and then copy each parameter to where it was saved,\n        # which would result in all processes on the same machine using the same set of devices.\n        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)\n        model.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n        if args.amp and \"scaler\" in checkpoint:\n            scaler.load_state_dict(checkpoint[\"scaler\"])\n\n    for epoch in range(args.start_epoch, args.epochs):\n        # train for one epoch, printing every 50 iterations\n        mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader,\n                                              device, epoch, print_freq=50,\n                                              warmup=True, scaler=scaler)\n        train_loss.append(mean_loss.item())\n        learning_rate.append(lr)\n\n        # update the learning rate\n        lr_scheduler.step()\n\n        # evaluate on the test dataset\n        det_info, seg_info = utils.evaluate(model, val_data_loader, device=device)\n\n        # write detection into txt\n        with open(det_results_file, \"a\") as f:\n            # 写入的数据包括coco指标还有loss和learning rate\n            result_info = [f\"{i:.4f}\" for i in det_info + [mean_loss.item()]] + [f\"{lr:.6f}\"]\n            txt = \"epoch:{} {}\".format(epoch, '  '.join(result_info))\n            f.write(txt + \"\\n\")\n\n        # write seg into txt\n        with open(seg_results_file, \"a\") as f:\n            # 写入的数据包括coco指标还有loss和learning rate\n            result_info = [f\"{i:.4f}\" for i in seg_info + [mean_loss.item()]] + [f\"{lr:.6f}\"]\n            txt = \"epoch:{} {}\".format(epoch, '  '.join(result_info))\n            f.write(txt + \"\\n\")\n\n        val_map.append(det_info[1])  # pascal mAP\n\n        # save weights\n        save_files = {\n            'model': model.state_dict(),\n            'optimizer': optimizer.state_dict(),\n            'lr_scheduler': lr_scheduler.state_dict(),\n            'epoch': epoch}\n        if args.amp:\n            save_files[\"scaler\"] = scaler.state_dict()\n        torch.save(save_files, \"./save_weights/model_{}.pth\".format(epoch))\n\n    # plot loss and lr curve\n    if len(train_loss) != 0 and len(learning_rate) != 0:\n        from plot_curve import plot_loss_and_lr\n        plot_loss_and_lr(train_loss, learning_rate)\n\n    # plot mAP curve\n    if len(val_map) != 0:\n        from plot_curve import plot_map\n        plot_map(val_map)\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 训练设备类型\n    parser.add_argument('--device', default='cuda:0', help='device')\n    # 训练数据集的根目录\n    parser.add_argument('--data-path', default='/data/coco2017', help='dataset')\n    # 检测目标类别数(不包含背景)\n    parser.add_argument('--num-classes', default=90, type=int, help='num_classes')\n    # 文件保存地址\n    parser.add_argument('--output-dir', default='./save_weights', help='path where to save')\n    # 若需要接着上次训练，则指定上次训练保存权重文件地址\n    parser.add_argument('--resume', default='', type=str, help='resume from checkpoint')\n    # 指定接着从哪个epoch数开始训练\n    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')\n    # 训练的总epoch数\n    parser.add_argument('--epochs', default=26, type=int, metavar='N',\n                        help='number of total epochs to run')\n    # 学习率\n    parser.add_argument('--lr', default=0.004, type=float,\n                        help='initial learning rate, 0.02 is the default value for training '\n                             'on 8 gpus and 2 images_per_gpu')\n    # SGD的momentum参数\n    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',\n                        help='momentum')\n    # SGD的weight_decay参数\n    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,\n                        metavar='W', help='weight decay (default: 1e-4)',\n                        dest='weight_decay')\n    # 针对torch.optim.lr_scheduler.MultiStepLR的参数\n    parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int,\n                        help='decrease lr every step-size epochs')\n    # 针对torch.optim.lr_scheduler.MultiStepLR的参数\n    parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')\n    # 训练的batch size(如果内存/GPU显存充裕，建议设置更大)\n    parser.add_argument('--batch_size', default=2, type=int, metavar='N',\n                        help='batch size when training.')\n    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)\n    parser.add_argument(\"--pretrain\", type=bool, default=True, help=\"load COCO pretrain weights.\")\n    # 是否使用混合精度训练(需要GPU支持混合精度)\n    parser.add_argument(\"--amp\", default=False, help=\"Use torch.cuda.amp for mixed precision training\")\n\n    args = parser.parse_args()\n    print(args)\n\n    # 检查保存权重文件夹是否存在，不存在则创建\n    if not os.path.exists(args.output_dir):\n        os.makedirs(args.output_dir)\n\n    main(args)\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/train_multi_GPU.py",
    "content": "import time\nimport os\nimport datetime\n\nimport torch\nfrom torchvision.ops.misc import FrozenBatchNorm2d\n\nimport transforms\nfrom my_dataset_coco import CocoDetection\nfrom my_dataset_voc import VOCInstances\nfrom backbone import resnet50_fpn_backbone\nfrom network_files import MaskRCNN\nimport train_utils.train_eval_utils as utils\nfrom train_utils import GroupedBatchSampler, create_aspect_ratio_groups, init_distributed_mode, save_on_master, mkdir\n\n\ndef create_model(num_classes, load_pretrain_weights=True):\n    # 如果GPU显存很小，batch_size不能设置很大，建议将norm_layer设置成FrozenBatchNorm2d(默认是nn.BatchNorm2d)\n    # FrozenBatchNorm2d的功能与BatchNorm2d类似，但参数无法更新\n    # trainable_layers包括['layer4', 'layer3', 'layer2', 'layer1', 'conv1']， 5代表全部训练\n    # backbone = resnet50_fpn_backbone(norm_layer=FrozenBatchNorm2d,\n    #                                  trainable_layers=3)\n    # resnet50 imagenet weights url: https://download.pytorch.org/models/resnet50-0676ba61.pth\n    backbone = resnet50_fpn_backbone(pretrain_path=\"resnet50.pth\", trainable_layers=3)\n    model = MaskRCNN(backbone, num_classes=num_classes)\n\n    if load_pretrain_weights:\n        # coco weights url: \"https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth\"\n        weights_dict = torch.load(\"./maskrcnn_resnet50_fpn_coco.pth\", map_location=\"cpu\")\n        for k in list(weights_dict.keys()):\n            if (\"box_predictor\" in k) or (\"mask_fcn_logits\" in k):\n                del weights_dict[k]\n\n        print(model.load_state_dict(weights_dict, strict=False))\n\n    return model\n\n\ndef main(args):\n    init_distributed_mode(args)\n    print(args)\n\n    device = torch.device(args.device)\n\n    # 用来保存coco_info的文件\n    now = datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n    det_results_file = f\"det_results{now}.txt\"\n    seg_results_file = f\"seg_results{now}.txt\"\n\n    # Data loading code\n    print(\"Loading data\")\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.ToTensor(),\n                                     transforms.RandomHorizontalFlip(0.5)]),\n        \"val\": transforms.Compose([transforms.ToTensor()])\n    }\n\n    COCO_root = args.data_path\n\n    # load train data set\n    # coco2017 -> annotations -> instances_train2017.json\n    train_dataset = CocoDetection(COCO_root, \"train\", data_transform[\"train\"])\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt\n    # train_dataset = VOCInstances(data_root, year=\"2012\", txt_name=\"train.txt\")\n\n    # load validation data set\n    # coco2017 -> annotations -> instances_val2017.json\n    val_dataset = CocoDetection(COCO_root, \"val\", data_transform[\"val\"])\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt\n    # val_dataset = VOCInstances(data_root, year=\"2012\", txt_name=\"val.txt\")\n\n    print(\"Creating data loaders\")\n    if args.distributed:\n        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)\n        test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)\n    else:\n        train_sampler = torch.utils.data.RandomSampler(train_dataset)\n        test_sampler = torch.utils.data.SequentialSampler(val_dataset)\n\n    if args.aspect_ratio_group_factor >= 0:\n        # 统计所有图像比例在bins区间中的位置索引\n        group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor)\n        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)\n    else:\n        train_batch_sampler = torch.utils.data.BatchSampler(\n            train_sampler, args.batch_size, drop_last=True)\n\n    data_loader = torch.utils.data.DataLoader(\n        train_dataset, batch_sampler=train_batch_sampler, num_workers=args.workers,\n        collate_fn=train_dataset.collate_fn)\n\n    data_loader_test = torch.utils.data.DataLoader(\n        val_dataset, batch_size=1,\n        sampler=test_sampler, num_workers=args.workers,\n        collate_fn=train_dataset.collate_fn)\n\n    print(\"Creating model\")\n    # create model num_classes equal background + classes\n    model = create_model(num_classes=args.num_classes + 1, load_pretrain_weights=args.pretrain)\n    model.to(device)\n\n    if args.distributed and args.sync_bn:\n        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)\n\n    model_without_ddp = model\n    if args.distributed:\n        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])\n        model_without_ddp = model.module\n\n    params = [p for p in model.parameters() if p.requires_grad]\n    optimizer = torch.optim.SGD(\n        params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)\n\n    scaler = torch.cuda.amp.GradScaler() if args.amp else None\n\n    # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma)\n    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)\n\n    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练\n    if args.resume:\n        # If map_location is missing, torch.load will first load the module to CPU\n        # and then copy each parameter to where it was saved,\n        # which would result in all processes on the same machine using the same set of devices.\n        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)\n        model_without_ddp.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n        if args.amp and \"scaler\" in checkpoint:\n            scaler.load_state_dict(checkpoint[\"scaler\"])\n\n    if args.test_only:\n        utils.evaluate(model, data_loader_test, device=device)\n        return\n\n    train_loss = []\n    learning_rate = []\n    val_map = []\n\n    print(\"Start training\")\n    start_time = time.time()\n    for epoch in range(args.start_epoch, args.epochs):\n        if args.distributed:\n            train_sampler.set_epoch(epoch)\n        mean_loss, lr = utils.train_one_epoch(model, optimizer, data_loader,\n                                              device, epoch, args.print_freq,\n                                              warmup=True, scaler=scaler)\n\n        # update learning rate\n        lr_scheduler.step()\n\n        # evaluate after every epoch\n        det_info, seg_info = utils.evaluate(model, data_loader_test, device=device)\n\n        # 只在主进程上进行写操作\n        if args.rank in [-1, 0]:\n            train_loss.append(mean_loss.item())\n            learning_rate.append(lr)\n            val_map.append(det_info[1])  # pascal mAP\n\n            # write into txt\n            with open(det_results_file, \"a\") as f:\n                # 写入的数据包括coco指标还有loss和learning rate\n                result_info = [f\"{i:.4f}\" for i in det_info + [mean_loss.item()]] + [f\"{lr:.6f}\"]\n                txt = \"epoch:{} {}\".format(epoch, '  '.join(result_info))\n                f.write(txt + \"\\n\")\n\n            with open(seg_results_file, \"a\") as f:\n                # 写入的数据包括coco指标还有loss和learning rate\n                result_info = [f\"{i:.4f}\" for i in seg_info + [mean_loss.item()]] + [f\"{lr:.6f}\"]\n                txt = \"epoch:{} {}\".format(epoch, '  '.join(result_info))\n                f.write(txt + \"\\n\")\n\n        if args.output_dir:\n            # 只在主进程上执行保存权重操作\n            save_files = {'model': model_without_ddp.state_dict(),\n                          'optimizer': optimizer.state_dict(),\n                          'lr_scheduler': lr_scheduler.state_dict(),\n                          'args': args,\n                          'epoch': epoch}\n            if args.amp:\n                save_files[\"scaler\"] = scaler.state_dict()\n            save_on_master(save_files,\n                           os.path.join(args.output_dir, f'model_{epoch}.pth'))\n\n    total_time = time.time() - start_time\n    total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n    print('Training time {}'.format(total_time_str))\n\n    if args.rank in [-1, 0]:\n        # plot loss and lr curve\n        if len(train_loss) != 0 and len(learning_rate) != 0:\n            from plot_curve import plot_loss_and_lr\n            plot_loss_and_lr(train_loss, learning_rate)\n\n        # plot mAP curve\n        if len(val_map) != 0:\n            from plot_curve import plot_map\n            plot_map(val_map)\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 训练文件的根目录(coco2017)\n    parser.add_argument('--data-path', default='/data/coco2017', help='dataset')\n    # 训练设备类型\n    parser.add_argument('--device', default='cuda', help='device')\n    # 检测目标类别数(不包含背景)\n    parser.add_argument('--num-classes', default=90, type=int, help='num_classes')\n    # 每块GPU上的batch_size\n    parser.add_argument('-b', '--batch-size', default=4, type=int,\n                        help='images per gpu, the total batch size is $NGPU x batch_size')\n    # 指定接着从哪个epoch数开始训练\n    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')\n    # 训练的总epoch数\n    parser.add_argument('--epochs', default=26, type=int, metavar='N',\n                        help='number of total epochs to run')\n    # 数据加载以及预处理的线程数\n    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',\n                        help='number of data loading workers (default: 4)')\n    # 学习率，这个需要根据gpu的数量以及batch_size进行设置0.02 / bs * num_GPU\n    parser.add_argument('--lr', default=0.005, type=float,\n                        help='initial learning rate, 0.02 is the default value for training '\n                             'on 8 gpus and 2 images_per_gpu')\n    # SGD的momentum参数\n    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',\n                        help='momentum')\n    # SGD的weight_decay参数\n    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,\n                        metavar='W', help='weight decay (default: 1e-4)',\n                        dest='weight_decay')\n    # 针对torch.optim.lr_scheduler.StepLR的参数\n    parser.add_argument('--lr-step-size', default=8, type=int, help='decrease lr every step-size epochs')\n    # 针对torch.optim.lr_scheduler.MultiStepLR的参数\n    parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int,\n                        help='decrease lr every step-size epochs')\n    # 针对torch.optim.lr_scheduler.MultiStepLR的参数\n    parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')\n    # 训练过程打印信息的频率\n    parser.add_argument('--print-freq', default=50, type=int, help='print frequency')\n    # 文件保存地址\n    parser.add_argument('--output-dir', default='./multi_train', help='path where to save')\n    # 基于上次的训练结果接着训练\n    parser.add_argument('--resume', default='', help='resume from checkpoint')\n    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)\n    parser.add_argument('--test-only', action=\"store_true\", help=\"test only\")\n\n    # 开启的进程数(注意不是线程)\n    parser.add_argument('--world-size', default=4, type=int,\n                        help='number of distributed processes')\n    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')\n    parser.add_argument(\"--sync-bn\", dest=\"sync_bn\", help=\"Use sync batch norm\", type=bool, default=False)\n    parser.add_argument(\"--pretrain\", type=bool, default=True, help=\"load COCO pretrain weights.\")\n    # 是否使用混合精度训练(需要GPU支持混合精度)\n    parser.add_argument(\"--amp\", default=False, help=\"Use torch.cuda.amp for mixed precision training\")\n\n    args = parser.parse_args()\n\n    # 如果指定了保存文件地址，检查文件夹是否存在，若不存在，则创建\n    if args.output_dir:\n        mkdir(args.output_dir)\n\n    main(args)\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/train_utils/__init__.py",
    "content": "from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups\nfrom .distributed_utils import init_distributed_mode, save_on_master, mkdir\nfrom .coco_eval import EvalCOCOMetric\nfrom .coco_utils import coco_remove_images_without_annotations, convert_coco_poly_mask, convert_to_coco_api\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/train_utils/coco_eval.py",
    "content": "import json\nimport copy\n\nimport numpy as np\nfrom pycocotools.coco import COCO\nfrom pycocotools.cocoeval import COCOeval\nimport pycocotools.mask as mask_util\nfrom .distributed_utils import all_gather, is_main_process\n\n\ndef merge(img_ids, eval_results):\n    \"\"\"将多个进程之间的数据汇总在一起\"\"\"\n    all_img_ids = all_gather(img_ids)\n    all_eval_results = all_gather(eval_results)\n\n    merged_img_ids = []\n    for p in all_img_ids:\n        merged_img_ids.extend(p)\n\n    merged_eval_results = []\n    for p in all_eval_results:\n        merged_eval_results.extend(p)\n\n    merged_img_ids = np.array(merged_img_ids)\n\n    # keep only unique (and in sorted order) images\n    # 去除重复的图片索引，多GPU训练时为了保证每个进程的训练图片数量相同，可能将一张图片分配给多个进程\n    merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)\n    merged_eval_results = [merged_eval_results[i] for i in idx]\n\n    return list(merged_img_ids), merged_eval_results\n\n\nclass EvalCOCOMetric:\n    def __init__(self,\n                 coco: COCO = None,\n                 iou_type: str = None,\n                 results_file_name: str = \"predict_results.json\",\n                 classes_mapping: dict = None):\n        self.coco = copy.deepcopy(coco)\n        self.img_ids = []  # 记录每个进程处理图片的ids\n        self.results = []\n        self.aggregation_results = None\n        self.classes_mapping = classes_mapping\n        self.coco_evaluator = None\n        assert iou_type in [\"bbox\", \"segm\", \"keypoints\"]\n        self.iou_type = iou_type\n        self.results_file_name = results_file_name\n\n    def prepare_for_coco_detection(self, targets, outputs):\n        \"\"\"将预测的结果转换成COCOeval指定的格式，针对目标检测任务\"\"\"\n        # 遍历每张图像的预测结果\n        for target, output in zip(targets, outputs):\n            if len(output) == 0:\n                continue\n\n            img_id = int(target[\"image_id\"])\n            if img_id in self.img_ids:\n                # 防止出现重复的数据\n                continue\n            self.img_ids.append(img_id)\n            per_image_boxes = output[\"boxes\"]\n            # 对于coco_eval, 需要的每个box的数据格式为[x_min, y_min, w, h]\n            # 而我们预测的box格式是[x_min, y_min, x_max, y_max]，所以需要转下格式\n            per_image_boxes[:, 2:] -= per_image_boxes[:, :2]\n            per_image_classes = output[\"labels\"].tolist()\n            per_image_scores = output[\"scores\"].tolist()\n\n            res_list = []\n            # 遍历每个目标的信息\n            for object_score, object_class, object_box in zip(\n                    per_image_scores, per_image_classes, per_image_boxes):\n                object_score = float(object_score)\n                class_idx = int(object_class)\n                if self.classes_mapping is not None:\n                    class_idx = int(self.classes_mapping[str(class_idx)])\n                # We recommend rounding coordinates to the nearest tenth of a pixel\n                # to reduce resulting JSON file size.\n                object_box = [round(b, 2) for b in object_box.tolist()]\n\n                res = {\"image_id\": img_id,\n                       \"category_id\": class_idx,\n                       \"bbox\": object_box,\n                       \"score\": round(object_score, 3)}\n                res_list.append(res)\n            self.results.append(res_list)\n\n    def prepare_for_coco_segmentation(self, targets, outputs):\n        \"\"\"将预测的结果转换成COCOeval指定的格式，针对实例分割任务\"\"\"\n        # 遍历每张图像的预测结果\n        for target, output in zip(targets, outputs):\n            if len(output) == 0:\n                continue\n\n            img_id = int(target[\"image_id\"])\n            if img_id in self.img_ids:\n                # 防止出现重复的数据\n                continue\n\n            self.img_ids.append(img_id)\n            per_image_masks = output[\"masks\"]\n            per_image_classes = output[\"labels\"].tolist()\n            per_image_scores = output[\"scores\"].tolist()\n\n            masks = per_image_masks > 0.5\n\n            res_list = []\n            # 遍历每个目标的信息\n            for mask, label, score in zip(masks, per_image_classes, per_image_scores):\n                rle = mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order=\"F\"))[0]\n                rle[\"counts\"] = rle[\"counts\"].decode(\"utf-8\")\n\n                class_idx = int(label)\n                if self.classes_mapping is not None:\n                    class_idx = int(self.classes_mapping[str(class_idx)])\n\n                res = {\"image_id\": img_id,\n                       \"category_id\": class_idx,\n                       \"segmentation\": rle,\n                       \"score\": round(score, 3)}\n                res_list.append(res)\n            self.results.append(res_list)\n\n    def update(self, targets, outputs):\n        if self.iou_type == \"bbox\":\n            self.prepare_for_coco_detection(targets, outputs)\n        elif self.iou_type == \"segm\":\n            self.prepare_for_coco_segmentation(targets, outputs)\n        else:\n            raise KeyError(f\"not support iou_type: {self.iou_type}\")\n\n    def synchronize_results(self):\n        # 同步所有进程中的数据\n        eval_ids, eval_results = merge(self.img_ids, self.results)\n        self.aggregation_results = {\"img_ids\": eval_ids, \"results\": eval_results}\n\n        # 主进程上保存即可\n        if is_main_process():\n            results = []\n            [results.extend(i) for i in eval_results]\n            # write predict results into json file\n            json_str = json.dumps(results, indent=4)\n            with open(self.results_file_name, 'w') as json_file:\n                json_file.write(json_str)\n\n    def evaluate(self):\n        # 只在主进程上评估即可\n        if is_main_process():\n            # accumulate predictions from all images\n            coco_true = self.coco\n            coco_pre = coco_true.loadRes(self.results_file_name)\n\n            self.coco_evaluator = COCOeval(cocoGt=coco_true, cocoDt=coco_pre, iouType=self.iou_type)\n\n            self.coco_evaluator.evaluate()\n            self.coco_evaluator.accumulate()\n            print(f\"IoU metric: {self.iou_type}\")\n            self.coco_evaluator.summarize()\n\n            coco_info = self.coco_evaluator.stats.tolist()  # numpy to list\n            return coco_info\n        else:\n            return None\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/train_utils/coco_utils.py",
    "content": "import torch\nimport torch.utils.data\nfrom pycocotools import mask as coco_mask\nfrom pycocotools.coco import COCO\n\n\ndef coco_remove_images_without_annotations(dataset, ids):\n    \"\"\"\n    删除coco数据集中没有目标，或者目标面积非常小的数据\n    refer to:\n    https://github.com/pytorch/vision/blob/master/references/detection/coco_utils.py\n    :param dataset:\n    :param cat_list:\n    :return:\n    \"\"\"\n    def _has_only_empty_bbox(anno):\n        return all(any(o <= 1 for o in obj[\"bbox\"][2:]) for obj in anno)\n\n    def _has_valid_annotation(anno):\n        # if it's empty, there is no annotation\n        if len(anno) == 0:\n            return False\n        # if all boxes have close to zero area, there is no annotation\n        if _has_only_empty_bbox(anno):\n            return False\n\n        return True\n\n    valid_ids = []\n    for ds_idx, img_id in enumerate(ids):\n        ann_ids = dataset.getAnnIds(imgIds=img_id, iscrowd=None)\n        anno = dataset.loadAnns(ann_ids)\n\n        if _has_valid_annotation(anno):\n            valid_ids.append(img_id)\n\n    return valid_ids\n\n\ndef convert_coco_poly_mask(segmentations, height, width):\n    masks = []\n    for polygons in segmentations:\n        rles = coco_mask.frPyObjects(polygons, height, width)\n        mask = coco_mask.decode(rles)\n        if len(mask.shape) < 3:\n            mask = mask[..., None]\n        mask = torch.as_tensor(mask, dtype=torch.uint8)\n        mask = mask.any(dim=2)\n        masks.append(mask)\n    if masks:\n        masks = torch.stack(masks, dim=0)\n    else:\n        # 如果mask为空，则说明没有目标，直接返回数值为0的mask\n        masks = torch.zeros((0, height, width), dtype=torch.uint8)\n    return masks\n\n\ndef convert_to_coco_api(self):\n    coco_ds = COCO()\n    # annotation IDs need to start at 1, not 0, see torchvision issue #1530\n    ann_id = 1\n    dataset = {\"images\": [], \"categories\": [], \"annotations\": []}\n    categories = set()\n    for img_idx in range(len(self)):\n        targets, h, w = self.get_annotations(img_idx)\n        img_id = targets[\"image_id\"].item()\n        img_dict = {\"id\": img_id,\n                    \"height\": h,\n                    \"width\": w}\n        dataset[\"images\"].append(img_dict)\n        bboxes = targets[\"boxes\"].clone()\n        # convert (x_min, ymin, xmax, ymax) to (xmin, ymin, w, h)\n        bboxes[:, 2:] -= bboxes[:, :2]\n        bboxes = bboxes.tolist()\n        labels = targets[\"labels\"].tolist()\n        areas = targets[\"area\"].tolist()\n        iscrowd = targets[\"iscrowd\"].tolist()\n        if \"masks\" in targets:\n            masks = targets[\"masks\"]\n            # make masks Fortran contiguous for coco_mask\n            masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1)\n        num_objs = len(bboxes)\n        for i in range(num_objs):\n            ann = {\"image_id\": img_id,\n                   \"bbox\": bboxes[i],\n                   \"category_id\": labels[i],\n                   \"area\": areas[i],\n                   \"iscrowd\": iscrowd[i],\n                   \"id\": ann_id}\n            categories.add(labels[i])\n            if \"masks\" in targets:\n                ann[\"segmentation\"] = coco_mask.encode(masks[i].numpy())\n            dataset[\"annotations\"].append(ann)\n            ann_id += 1\n    dataset[\"categories\"] = [{\"id\": i} for i in sorted(categories)]\n    coco_ds.dataset = dataset\n    coco_ds.createIndex()\n    return coco_ds\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/train_utils/distributed_utils.py",
    "content": "from collections import defaultdict, deque\nimport datetime\nimport pickle\nimport time\nimport errno\nimport os\n\nimport torch\nimport torch.distributed as dist\n\n\nclass SmoothedValue(object):\n    \"\"\"Track a series of values and provide access to smoothed values over a\n    window or the global series average.\n    \"\"\"\n    def __init__(self, window_size=20, fmt=None):\n        if fmt is None:\n            fmt = \"{value:.4f} ({global_avg:.4f})\"\n        self.deque = deque(maxlen=window_size)  # deque简单理解成加强版list\n        self.total = 0.0\n        self.count = 0\n        self.fmt = fmt\n\n    def update(self, value, n=1):\n        self.deque.append(value)\n        self.count += n\n        self.total += value * n\n\n    def synchronize_between_processes(self):\n        \"\"\"\n        Warning: does not synchronize the deque!\n        \"\"\"\n        if not is_dist_avail_and_initialized():\n            return\n        t = torch.tensor([self.count, self.total], dtype=torch.float64, device=\"cuda\")\n        dist.barrier()\n        dist.all_reduce(t)\n        t = t.tolist()\n        self.count = int(t[0])\n        self.total = t[1]\n\n    @property\n    def median(self):  # @property 是装饰器，这里可简单理解为增加median属性(只读)\n        d = torch.tensor(list(self.deque))\n        return d.median().item()\n\n    @property\n    def avg(self):\n        d = torch.tensor(list(self.deque), dtype=torch.float32)\n        return d.mean().item()\n\n    @property\n    def global_avg(self):\n        return self.total / self.count\n\n    @property\n    def max(self):\n        return max(self.deque)\n\n    @property\n    def value(self):\n        return self.deque[-1]\n\n    def __str__(self):\n        return self.fmt.format(\n            median=self.median,\n            avg=self.avg,\n            global_avg=self.global_avg,\n            max=self.max,\n            value=self.value)\n\n\ndef all_gather(data):\n    \"\"\"\n    收集各个进程中的数据\n    Run all_gather on arbitrary picklable data (not necessarily tensors)\n    Args:\n        data: any picklable object\n    Returns:\n        list[data]: list of data gathered from each rank\n    \"\"\"\n    world_size = get_world_size()  # 进程数\n    if world_size == 1:\n        return [data]\n\n    data_list = [None] * world_size\n    dist.all_gather_object(data_list, data)\n\n    return data_list\n\n\ndef reduce_dict(input_dict, average=True):\n    \"\"\"\n    Args:\n        input_dict (dict): all the values will be reduced\n        average (bool): whether to do average or sum\n    Reduce the values in the dictionary from all processes so that all processes\n    have the averaged results. Returns a dict with the same fields as\n    input_dict, after reduction.\n    \"\"\"\n    world_size = get_world_size()\n    if world_size < 2:  # 单GPU的情况\n        return input_dict\n    with torch.no_grad():  # 多GPU的情况\n        names = []\n        values = []\n        # sort the keys so that they are consistent across processes\n        for k in sorted(input_dict.keys()):\n            names.append(k)\n            values.append(input_dict[k])\n        values = torch.stack(values, dim=0)\n        dist.all_reduce(values)\n        if average:\n            values /= world_size\n\n        reduced_dict = {k: v for k, v in zip(names, values)}\n        return reduced_dict\n\n\nclass MetricLogger(object):\n    def __init__(self, delimiter=\"\\t\"):\n        self.meters = defaultdict(SmoothedValue)\n        self.delimiter = delimiter\n\n    def update(self, **kwargs):\n        for k, v in kwargs.items():\n            if isinstance(v, torch.Tensor):\n                v = v.item()\n            assert isinstance(v, (float, int))\n            self.meters[k].update(v)\n\n    def __getattr__(self, attr):\n        if attr in self.meters:\n            return self.meters[attr]\n        if attr in self.__dict__:\n            return self.__dict__[attr]\n        raise AttributeError(\"'{}' object has no attribute '{}'\".format(\n            type(self).__name__, attr))\n\n    def __str__(self):\n        loss_str = []\n        for name, meter in self.meters.items():\n            loss_str.append(\n                \"{}: {}\".format(name, str(meter))\n            )\n        return self.delimiter.join(loss_str)\n\n    def synchronize_between_processes(self):\n        for meter in self.meters.values():\n            meter.synchronize_between_processes()\n\n    def add_meter(self, name, meter):\n        self.meters[name] = meter\n\n    def log_every(self, iterable, print_freq, header=None):\n        i = 0\n        if not header:\n            header = \"\"\n        start_time = time.time()\n        end = time.time()\n        iter_time = SmoothedValue(fmt='{avg:.4f}')\n        data_time = SmoothedValue(fmt='{avg:.4f}')\n        space_fmt = \":\" + str(len(str(len(iterable)))) + \"d\"\n        if torch.cuda.is_available():\n            log_msg = self.delimiter.join([header,\n                                           '[{0' + space_fmt + '}/{1}]',\n                                           'eta: {eta}',\n                                           '{meters}',\n                                           'time: {time}',\n                                           'data: {data}',\n                                           'max mem: {memory:.0f}'])\n        else:\n            log_msg = self.delimiter.join([header,\n                                           '[{0' + space_fmt + '}/{1}]',\n                                           'eta: {eta}',\n                                           '{meters}',\n                                           'time: {time}',\n                                           'data: {data}'])\n        MB = 1024.0 * 1024.0\n        for obj in iterable:\n            data_time.update(time.time() - end)\n            yield obj\n            iter_time.update(time.time() - end)\n            if i % print_freq == 0 or i == len(iterable) - 1:\n                eta_second = iter_time.global_avg * (len(iterable) - i)\n                eta_string = str(datetime.timedelta(seconds=eta_second))\n                if torch.cuda.is_available():\n                    print(log_msg.format(i, len(iterable),\n                                         eta=eta_string,\n                                         meters=str(self),\n                                         time=str(iter_time),\n                                         data=str(data_time),\n                                         memory=torch.cuda.max_memory_allocated() / MB))\n                else:\n                    print(log_msg.format(i, len(iterable),\n                                         eta=eta_string,\n                                         meters=str(self),\n                                         time=str(iter_time),\n                                         data=str(data_time)))\n            i += 1\n            end = time.time()\n        total_time = time.time() - start_time\n        total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n        print('{} Total time: {} ({:.4f} s / it)'.format(header,\n                                                         total_time_str,\n\n                                                         total_time / len(iterable)))\n\n\ndef warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):\n\n    def f(x):\n        \"\"\"根据step数返回一个学习率倍率因子\"\"\"\n        if x >= warmup_iters:  # 当迭代数大于给定的warmup_iters时，倍率因子为1\n            return 1\n        alpha = float(x) / warmup_iters\n        # 迭代过程中倍率因子从warmup_factor -> 1\n        return warmup_factor * (1 - alpha) + alpha\n\n    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)\n\n\ndef mkdir(path):\n    try:\n        os.makedirs(path)\n    except OSError as e:\n        if e.errno != errno.EEXIST:\n            raise\n\n\ndef setup_for_distributed(is_master):\n    \"\"\"\n    This function disables when not in master process\n    \"\"\"\n    import builtins as __builtin__\n    builtin_print = __builtin__.print\n\n    def print(*args, **kwargs):\n        force = kwargs.pop('force', False)\n        if is_master or force:\n            builtin_print(*args, **kwargs)\n\n    __builtin__.print = print\n\n\ndef is_dist_avail_and_initialized():\n    \"\"\"检查是否支持分布式环境\"\"\"\n    if not dist.is_available():\n        return False\n    if not dist.is_initialized():\n        return False\n    return True\n\n\ndef get_world_size():\n    if not is_dist_avail_and_initialized():\n        return 1\n    return dist.get_world_size()\n\n\ndef get_rank():\n    if not is_dist_avail_and_initialized():\n        return 0\n    return dist.get_rank()\n\n\ndef is_main_process():\n    return get_rank() == 0\n\n\ndef save_on_master(*args, **kwargs):\n    if is_main_process():\n        torch.save(*args, **kwargs)\n\n\ndef init_distributed_mode(args):\n    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:\n        args.rank = int(os.environ[\"RANK\"])\n        args.world_size = int(os.environ['WORLD_SIZE'])\n        args.gpu = int(os.environ['LOCAL_RANK'])\n    elif 'SLURM_PROCID' in os.environ:\n        args.rank = int(os.environ['SLURM_PROCID'])\n        args.gpu = args.rank % torch.cuda.device_count()\n    else:\n        print('Not using distributed mode')\n        args.distributed = False\n        return\n\n    args.distributed = True\n\n    torch.cuda.set_device(args.gpu)\n    args.dist_backend = 'nccl'\n    print('| distributed init (rank {}): {}'.format(\n        args.rank, args.dist_url), flush=True)\n    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,\n                                         world_size=args.world_size, rank=args.rank)\n    torch.distributed.barrier()\n    setup_for_distributed(args.rank == 0)\n\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/train_utils/group_by_aspect_ratio.py",
    "content": "import bisect\nfrom collections import defaultdict\nimport copy\nfrom itertools import repeat, chain\nimport math\nimport numpy as np\n\nimport torch\nimport torch.utils.data\nfrom torch.utils.data.sampler import BatchSampler, Sampler\nfrom torch.utils.model_zoo import tqdm\nimport torchvision\n\nfrom PIL import Image\n\n\ndef _repeat_to_at_least(iterable, n):\n    repeat_times = math.ceil(n / len(iterable))\n    repeated = chain.from_iterable(repeat(iterable, repeat_times))\n    return list(repeated)\n\n\nclass GroupedBatchSampler(BatchSampler):\n    \"\"\"\n    Wraps another sampler to yield a mini-batch of indices.\n    It enforces that the batch only contain elements from the same group.\n    It also tries to provide mini-batches which follows an ordering which is\n    as close as possible to the ordering from the original sampler.\n    Arguments:\n        sampler (Sampler): Base sampler.\n        group_ids (list[int]): If the sampler produces indices in range [0, N),\n            `group_ids` must be a list of `N` ints which contains the group id of each sample.\n            The group ids must be a continuous set of integers starting from\n            0, i.e. they must be in the range [0, num_groups).\n        batch_size (int): Size of mini-batch.\n    \"\"\"\n    def __init__(self, sampler, group_ids, batch_size):\n        if not isinstance(sampler, Sampler):\n            raise ValueError(\n                \"sampler should be an instance of \"\n                \"torch.utils.data.Sampler, but got sampler={}\".format(sampler)\n            )\n        self.sampler = sampler\n        self.group_ids = group_ids\n        self.batch_size = batch_size\n\n    def __iter__(self):\n        buffer_per_group = defaultdict(list)\n        samples_per_group = defaultdict(list)\n\n        num_batches = 0\n        for idx in self.sampler:\n            group_id = self.group_ids[idx]\n            buffer_per_group[group_id].append(idx)\n            samples_per_group[group_id].append(idx)\n            if len(buffer_per_group[group_id]) == self.batch_size:\n                yield buffer_per_group[group_id]\n                num_batches += 1\n                del buffer_per_group[group_id]\n            assert len(buffer_per_group[group_id]) < self.batch_size\n\n        # now we have run out of elements that satisfy\n        # the group criteria, let's return the remaining\n        # elements so that the size of the sampler is\n        # deterministic\n        expected_num_batches = len(self)\n        num_remaining = expected_num_batches - num_batches\n        if num_remaining > 0:\n            # for the remaining batches, take first the buffers with largest number\n            # of elements\n            for group_id, _ in sorted(buffer_per_group.items(),\n                                      key=lambda x: len(x[1]), reverse=True):\n                remaining = self.batch_size - len(buffer_per_group[group_id])\n                samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining)\n                buffer_per_group[group_id].extend(samples_from_group_id[:remaining])\n                assert len(buffer_per_group[group_id]) == self.batch_size\n                yield buffer_per_group[group_id]\n                num_remaining -= 1\n                if num_remaining == 0:\n                    break\n        assert num_remaining == 0\n\n    def __len__(self):\n        return len(self.sampler) // self.batch_size\n\n\ndef _compute_aspect_ratios_slow(dataset, indices=None):\n    print(\"Your dataset doesn't support the fast path for \"\n          \"computing the aspect ratios, so will iterate over \"\n          \"the full dataset and load every image instead. \"\n          \"This might take some time...\")\n    if indices is None:\n        indices = range(len(dataset))\n\n    class SubsetSampler(Sampler):\n        def __init__(self, indices):\n            self.indices = indices\n\n        def __iter__(self):\n            return iter(self.indices)\n\n        def __len__(self):\n            return len(self.indices)\n\n    sampler = SubsetSampler(indices)\n    data_loader = torch.utils.data.DataLoader(\n        dataset, batch_size=1, sampler=sampler,\n        num_workers=14,  # you might want to increase it for faster processing\n        collate_fn=lambda x: x[0])\n    aspect_ratios = []\n    with tqdm(total=len(dataset)) as pbar:\n        for _i, (img, _) in enumerate(data_loader):\n            pbar.update(1)\n            height, width = img.shape[-2:]\n            aspect_ratio = float(width) / float(height)\n            aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_custom_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n    aspect_ratios = []\n    for i in indices:\n        height, width = dataset.get_height_and_width(i)\n        aspect_ratio = float(width) / float(height)\n        aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_coco_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n    aspect_ratios = []\n    for i in indices:\n        img_info = dataset.coco.imgs[dataset.ids[i]]\n        aspect_ratio = float(img_info[\"width\"]) / float(img_info[\"height\"])\n        aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_voc_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n    aspect_ratios = []\n    for i in indices:\n        # this doesn't load the data into memory, because PIL loads it lazily\n        width, height = Image.open(dataset.images[i]).size\n        aspect_ratio = float(width) / float(height)\n        aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_subset_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n\n    ds_indices = [dataset.indices[i] for i in indices]\n    return compute_aspect_ratios(dataset.dataset, ds_indices)\n\n\ndef compute_aspect_ratios(dataset, indices=None):\n    if hasattr(dataset, \"get_height_and_width\"):\n        return _compute_aspect_ratios_custom_dataset(dataset, indices)\n\n    if isinstance(dataset, torchvision.datasets.CocoDetection):\n        return _compute_aspect_ratios_coco_dataset(dataset, indices)\n\n    if isinstance(dataset, torchvision.datasets.VOCDetection):\n        return _compute_aspect_ratios_voc_dataset(dataset, indices)\n\n    if isinstance(dataset, torch.utils.data.Subset):\n        return _compute_aspect_ratios_subset_dataset(dataset, indices)\n\n    # slow path\n    return _compute_aspect_ratios_slow(dataset, indices)\n\n\ndef _quantize(x, bins):\n    bins = copy.deepcopy(bins)\n    bins = sorted(bins)\n    # bisect_right：寻找y元素按顺序应该排在bins中哪个元素的右边，返回的是索引\n    quantized = list(map(lambda y: bisect.bisect_right(bins, y), x))\n    return quantized\n\n\ndef create_aspect_ratio_groups(dataset, k=0):\n    # 计算所有数据集中的图片width/height比例\n    aspect_ratios = compute_aspect_ratios(dataset)\n    # 将[0.5, 2]区间划分成2*k+1等份\n    bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0]\n\n    # 统计所有图像比例在bins区间中的位置索引\n    groups = _quantize(aspect_ratios, bins)\n    # count number of elements per group\n    # 统计每个区间的频次\n    counts = np.unique(groups, return_counts=True)[1]\n    fbins = [0] + bins + [np.inf]\n    print(\"Using {} as bins for aspect ratio quantization\".format(fbins))\n    print(\"Count of instances per bin: {}\".format(counts))\n    return groups\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/train_utils/train_eval_utils.py",
    "content": "import math\nimport sys\nimport time\n\nimport torch\n\nimport train_utils.distributed_utils as utils\nfrom .coco_eval import EvalCOCOMetric\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch,\n                    print_freq=50, warmup=False, scaler=None):\n    model.train()\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))\n    header = 'Epoch: [{}]'.format(epoch)\n\n    lr_scheduler = None\n    if epoch == 0 and warmup is True:  # 当训练第一轮（epoch=0）时，启用warmup训练方式，可理解为热身训练\n        warmup_factor = 1.0 / 1000\n        warmup_iters = min(1000, len(data_loader) - 1)\n\n        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)\n\n    mloss = torch.zeros(1).to(device)  # mean losses\n    for i, [images, targets] in enumerate(metric_logger.log_every(data_loader, print_freq, header)):\n        images = list(image.to(device) for image in images)\n        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]\n\n        # 混合精度训练上下文管理器，如果在CPU环境中不起任何作用\n        with torch.cuda.amp.autocast(enabled=scaler is not None):\n            loss_dict = model(images, targets)\n\n            losses = sum(loss for loss in loss_dict.values())\n\n        # reduce losses over all GPUs for logging purpose\n        loss_dict_reduced = utils.reduce_dict(loss_dict)\n        losses_reduced = sum(loss for loss in loss_dict_reduced.values())\n\n        loss_value = losses_reduced.item()\n        # 记录训练损失\n        mloss = (mloss * i + loss_value) / (i + 1)  # update mean losses\n\n        if not math.isfinite(loss_value):  # 当计算的损失为无穷大时停止训练\n            print(\"Loss is {}, stopping training\".format(loss_value))\n            print(loss_dict_reduced)\n            sys.exit(1)\n\n        optimizer.zero_grad()\n        if scaler is not None:\n            scaler.scale(losses).backward()\n            scaler.step(optimizer)\n            scaler.update()\n        else:\n            losses.backward()\n            optimizer.step()\n\n        if lr_scheduler is not None:  # 第一轮使用warmup训练方式\n            lr_scheduler.step()\n\n        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)\n        now_lr = optimizer.param_groups[0][\"lr\"]\n        metric_logger.update(lr=now_lr)\n\n    return mloss, now_lr\n\n\n@torch.no_grad()\ndef evaluate(model, data_loader, device):\n    cpu_device = torch.device(\"cpu\")\n    model.eval()\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    header = \"Test: \"\n\n    det_metric = EvalCOCOMetric(data_loader.dataset.coco, iou_type=\"bbox\", results_file_name=\"det_results.json\")\n    seg_metric = EvalCOCOMetric(data_loader.dataset.coco, iou_type=\"segm\", results_file_name=\"seg_results.json\")\n    for image, targets in metric_logger.log_every(data_loader, 100, header):\n        image = list(img.to(device) for img in image)\n\n        # 当使用CPU时，跳过GPU相关指令\n        if device != torch.device(\"cpu\"):\n            torch.cuda.synchronize(device)\n\n        model_time = time.time()\n        outputs = model(image)\n\n        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]\n        model_time = time.time() - model_time\n\n        det_metric.update(targets, outputs)\n        seg_metric.update(targets, outputs)\n        metric_logger.update(model_time=model_time)\n\n    # gather the stats from all processes\n    metric_logger.synchronize_between_processes()\n    print(\"Averaged stats:\", metric_logger)\n\n    # 同步所有进程中的数据\n    det_metric.synchronize_results()\n    seg_metric.synchronize_results()\n\n    if utils.is_main_process():\n        coco_info = det_metric.evaluate()\n        seg_info = seg_metric.evaluate()\n    else:\n        coco_info = None\n        seg_info = None\n\n    return coco_info, seg_info\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/transforms.py",
    "content": "import random\nfrom torchvision.transforms import functional as F\n\n\nclass Compose(object):\n    \"\"\"组合多个transform函数\"\"\"\n    def __init__(self, transforms):\n        self.transforms = transforms\n\n    def __call__(self, image, target):\n        for t in self.transforms:\n            image, target = t(image, target)\n        return image, target\n\n\nclass ToTensor(object):\n    \"\"\"将PIL图像转为Tensor\"\"\"\n    def __call__(self, image, target):\n        image = F.to_tensor(image)\n        return image, target\n\n\nclass RandomHorizontalFlip(object):\n    \"\"\"随机水平翻转图像以及bboxes\"\"\"\n    def __init__(self, prob=0.5):\n        self.prob = prob\n\n    def __call__(self, image, target):\n        if random.random() < self.prob:\n            height, width = image.shape[-2:]\n            image = image.flip(-1)  # 水平翻转图片\n            bbox = target[\"boxes\"]\n            # bbox: xmin, ymin, xmax, ymax\n            bbox[:, [0, 2]] = width - bbox[:, [2, 0]]  # 翻转对应bbox坐标信息\n            target[\"boxes\"] = bbox\n            if \"masks\" in target:\n                target[\"masks\"] = target[\"masks\"].flip(-1)\n        return image, target\n"
  },
  {
    "path": "pytorch_object_detection/mask_rcnn/validation.py",
    "content": "\"\"\"\n该脚本用于调用训练好的模型权重去计算验证集/测试集的COCO指标\n以及每个类别的mAP(IoU=0.5)\n\"\"\"\n\nimport os\nimport json\n\nimport torch\nfrom tqdm import tqdm\nimport numpy as np\n\nimport transforms\nfrom backbone import resnet50_fpn_backbone\nfrom network_files import MaskRCNN\nfrom my_dataset_coco import CocoDetection\nfrom my_dataset_voc import VOCInstances\nfrom train_utils import EvalCOCOMetric\n\n\ndef summarize(self, catId=None):\n    \"\"\"\n    Compute and display summary metrics for evaluation results.\n    Note this functin can *only* be applied on the default parameter setting\n    \"\"\"\n\n    def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100):\n        p = self.params\n        iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'\n        titleStr = 'Average Precision' if ap == 1 else 'Average Recall'\n        typeStr = '(AP)' if ap == 1 else '(AR)'\n        iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \\\n            if iouThr is None else '{:0.2f}'.format(iouThr)\n\n        aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]\n        mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]\n\n        if ap == 1:\n            # dimension of precision: [TxRxKxAxM]\n            s = self.eval['precision']\n            # IoU\n            if iouThr is not None:\n                t = np.where(iouThr == p.iouThrs)[0]\n                s = s[t]\n\n            if isinstance(catId, int):\n                s = s[:, :, catId, aind, mind]\n            else:\n                s = s[:, :, :, aind, mind]\n\n        else:\n            # dimension of recall: [TxKxAxM]\n            s = self.eval['recall']\n            if iouThr is not None:\n                t = np.where(iouThr == p.iouThrs)[0]\n                s = s[t]\n\n            if isinstance(catId, int):\n                s = s[:, catId, aind, mind]\n            else:\n                s = s[:, :, aind, mind]\n\n        if len(s[s > -1]) == 0:\n            mean_s = -1\n        else:\n            mean_s = np.mean(s[s > -1])\n\n        print_string = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)\n        return mean_s, print_string\n\n    stats, print_list = [0] * 12, [\"\"] * 12\n    stats[0], print_list[0] = _summarize(1)\n    stats[1], print_list[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2])\n    stats[2], print_list[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2])\n    stats[3], print_list[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2])\n    stats[4], print_list[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2])\n    stats[5], print_list[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2])\n    stats[6], print_list[6] = _summarize(0, maxDets=self.params.maxDets[0])\n    stats[7], print_list[7] = _summarize(0, maxDets=self.params.maxDets[1])\n    stats[8], print_list[8] = _summarize(0, maxDets=self.params.maxDets[2])\n    stats[9], print_list[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2])\n    stats[10], print_list[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2])\n    stats[11], print_list[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2])\n\n    print_info = \"\\n\".join(print_list)\n\n    if not self.eval:\n        raise Exception('Please run accumulate() first')\n\n    return stats, print_info\n\n\ndef save_info(coco_evaluator,\n              category_index: dict,\n              save_name: str = \"record_mAP.txt\"):\n    iou_type = coco_evaluator.params.iouType\n    print(f\"IoU metric: {iou_type}\")\n    # calculate COCO info for all classes\n    coco_stats, print_coco = summarize(coco_evaluator)\n\n    # calculate voc info for every classes(IoU=0.5)\n    classes = [v for v in category_index.values() if v != \"N/A\"]\n    voc_map_info_list = []\n    for i in range(len(classes)):\n        stats, _ = summarize(coco_evaluator, catId=i)\n        voc_map_info_list.append(\" {:15}: {}\".format(classes[i], stats[1]))\n\n    print_voc = \"\\n\".join(voc_map_info_list)\n    print(print_voc)\n\n    # 将验证结果保存至txt文件中\n    with open(save_name, \"w\") as f:\n        record_lines = [\"COCO results:\",\n                        print_coco,\n                        \"\",\n                        \"mAP(IoU=0.5) for each category:\",\n                        print_voc]\n        f.write(\"\\n\".join(record_lines))\n\n\ndef main(parser_data):\n    device = torch.device(parser_data.device if torch.cuda.is_available() else \"cpu\")\n    print(\"Using {} device training.\".format(device.type))\n\n    data_transform = {\n        \"val\": transforms.Compose([transforms.ToTensor()])\n    }\n\n    # read class_indict\n    label_json_path = parser_data.label_json_path\n    assert os.path.exists(label_json_path), \"json file {} dose not exist.\".format(label_json_path)\n    with open(label_json_path, 'r') as f:\n        category_index = json.load(f)\n\n    data_root = parser_data.data_path\n\n    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch\n    batch_size = parser_data.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using %g dataloader workers' % nw)\n\n    # load validation data set\n    val_dataset = CocoDetection(data_root, \"val\", data_transform[\"val\"])\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt\n    # val_dataset = VOCInstances(data_root, year=\"2012\", txt_name=\"val.txt\", transforms=data_transform[\"val\"])\n    val_dataset_loader = torch.utils.data.DataLoader(val_dataset,\n                                                     batch_size=batch_size,\n                                                     shuffle=False,\n                                                     pin_memory=True,\n                                                     num_workers=nw,\n                                                     collate_fn=val_dataset.collate_fn)\n\n    # create model\n    backbone = resnet50_fpn_backbone()\n    model = MaskRCNN(backbone, num_classes=args.num_classes + 1)\n\n    # 载入你自己训练好的模型权重\n    weights_path = parser_data.weights_path\n    assert os.path.exists(weights_path), \"not found {} file.\".format(weights_path)\n    model.load_state_dict(torch.load(weights_path, map_location='cpu')['model'])\n    # print(model)\n\n    model.to(device)\n\n    # evaluate on the val dataset\n    cpu_device = torch.device(\"cpu\")\n\n    det_metric = EvalCOCOMetric(val_dataset.coco, \"bbox\", \"det_results.json\")\n    seg_metric = EvalCOCOMetric(val_dataset.coco, \"segm\", \"seg_results.json\")\n    model.eval()\n    with torch.no_grad():\n        for image, targets in tqdm(val_dataset_loader, desc=\"validation...\"):\n            # 将图片传入指定设备device\n            image = list(img.to(device) for img in image)\n\n            # inference\n            outputs = model(image)\n\n            outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]\n            det_metric.update(targets, outputs)\n            seg_metric.update(targets, outputs)\n\n    det_metric.synchronize_results()\n    seg_metric.synchronize_results()\n    det_metric.evaluate()\n    seg_metric.evaluate()\n\n    save_info(det_metric.coco_evaluator, category_index, \"det_record_mAP.txt\")\n    save_info(seg_metric.coco_evaluator, category_index, \"seg_record_mAP.txt\")\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 使用设备类型\n    parser.add_argument('--device', default='cuda', help='device')\n\n    # 检测目标类别数(不包含背景)\n    parser.add_argument('--num-classes', type=int, default=90, help='number of classes')\n\n    # 数据集的根目录\n    parser.add_argument('--data-path', default='/data/coco2017', help='dataset root')\n\n    # 训练好的权重文件\n    parser.add_argument('--weights-path', default='./save_weights/model_25.pth', type=str, help='training weights')\n\n    # batch size(set to 1, don't change)\n    parser.add_argument('--batch-size', default=1, type=int, metavar='N',\n                        help='batch size when validation.')\n    # 类别索引和类别名称对应关系\n    parser.add_argument('--label-json-path', type=str, default=\"coco91_indices.json\")\n\n    args = parser.parse_args()\n\n    main(args)\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/README.md",
    "content": "# RetinaNet\n\n## 该项目主要是来自pytorch官方torchvision模块中的源码\n* https://github.com/pytorch/vision/tree/master/torchvision/models/detection\n\n## 环境配置：\n* Python3.6/3.7/3.8\n* Pytorch1.7.1(注意：必须是1.6.0或以上，因为使用官方提供的混合精度训练1.6.0后才支持)\n* pycocotools(Linux:`pip install pycocotools`; Windows:`pip install pycocotools-windows`(不需要额外安装vs))\n* Ubuntu或Centos(不建议Windows)\n* 最好使用GPU训练\n* 详细环境配置见`requirements.txt`\n\n## 文件结构：\n```\n  ├── backbone: 特征提取网络(ResNet50+FPN)\n  ├── network_files: RetinaNet网络\n  ├── train_utils: 训练验证相关模块（包括cocotools）\n  ├── my_dataset.py: 自定义dataset用于读取VOC数据集\n  ├── train.py: 以resnet50+FPN做为backbone进行训练\n  ├── train_multi_GPU.py: 针对使用多GPU的用户使用\n  ├── predict.py: 简易的预测脚本，使用训练好的权重进行预测测试\n  ├── validation.py: 利用训练好的权重验证/测试数据的COCO指标，并生成record_mAP.txt文件\n  └── pascal_voc_classes.json: pascal_voc标签文件(注意索引从0开始，不包括背景)\n```\n\n## 预训练权重下载地址（下载后放入backbone文件夹中）：\n* ResNet50+FPN backbone: https://download.pytorch.org/models/retinanet_resnet50_fpn_coco-eeacb38b.pth\n* 注意，下载的预训练权重记得要重命名，比如在train.py中读取的是`retinanet_resnet50_fpn_coco.pth`文件，\n  不是`retinanet_resnet50_fpn_coco-eeacb38b.pth`\n\n\n## 数据集，本例程使用的是PASCAL VOC2012数据集\n* Pascal VOC2012 train/val数据集下载地址：http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar\n* 如果不了解数据集或者想使用自己的数据集进行训练，请参考我的bilibili：https://b23.tv/F1kSCK\n* 基于迁移学习在PASCAL VOC2012训练集训练得到的权重： 链接: https://pan.baidu.com/s/1mqrBFWuJ_lfDloCfVjWqaA  密码: sw0t\n* 在PASCAL VOC2012验证集上结果：\n```\n Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.563\n Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.798\n Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.616\n Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.236\n Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.434\n Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.626\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.486\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.688\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.707\n Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.421\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.604\n Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.758\n```\n\n## 训练方法\n* 确保提前准备好数据集\n* 确保提前下载好对应预训练模型权重\n* 若要单GPU训练，直接使用train.py训练脚本\n* 若要使用多GPU训练，使用`python -m torch.distributed.launch --nproc_per_node=8 --use_env train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量\n* 如果想指定使用哪些GPU设备可在指令前加上`CUDA_VISIBLE_DEVICES=0,3`(例如我只要使用设备中的第1块和第4块GPU设备)\n* `CUDA_VISIBLE_DEVICES=0,3 python -m torch.distributed.launch --nproc_per_node=2 --use_env train_multi_GPU.py`\n\n## 注意事项\n* 在使用训练脚本时，注意要将`--data-path`(VOC_root)设置为自己存放`VOCdevkit`文件夹所在的**根目录**\n* 由于带有FPN结构的Faster RCNN很吃显存，如果GPU的显存不够(如果batch_size小于8的话)建议在create_model函数中使用默认的norm_layer，\n  即不传递norm_layer变量，默认去使用FrozenBatchNorm2d(即不会去更新参数的bn层),使用中发现效果也很好。\n* 训练过程中保存的`results.txt`是每个epoch在验证集上的COCO指标，前12个值是COCO指标，后面两个值是训练平均损失以及学习率\n* 在使用预测脚本时，要将`weights_path`设置为你自己生成的权重路径。\n* 使用validation文件时，注意确保你的验证集或者测试集中必须包含每个类别的目标，并且使用时只需要修改`--num-classes`、`--data-path`和`--weights-path`即可，其他代码尽量不要改动\n\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/backbone/__init__.py",
    "content": "from .feature_pyramid_network import FeaturePyramidNetwork, LastLevelP6P7, LastLevelMaxPool\nfrom .resnet50_fpn_model import resnet50_fpn_backbone\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/backbone/feature_pyramid_network.py",
    "content": "from collections import OrderedDict\n\nimport torch.nn as nn\nimport torch\nfrom torch import Tensor\nimport torch.nn.functional as F\n\nfrom torch.jit.annotations import Tuple, List, Dict\n\n\nclass IntermediateLayerGetter(nn.ModuleDict):\n    \"\"\"\n    Module wrapper that returns intermediate layers from a model\n    It has a strong assumption that the modules have been registered\n    into the model in the same order as they are used.\n    This means that one should **not** reuse the same nn.Module\n    twice in the forward if you want this to work.\n    Additionally, it is only able to query submodules that are directly\n    assigned to the model. So if `model` is passed, `model.feature1` can\n    be returned, but not `model.feature1.layer2`.\n    Arguments:\n        model (nn.Module): model on which we will extract the features\n        return_layers (Dict[name, new_name]): a dict containing the names\n            of the modules for which the activations will be returned as\n            the key of the dict, and the value of the dict is the name\n            of the returned activation (which the user can specify).\n    \"\"\"\n    __annotations__ = {\n        \"return_layers\": Dict[str, str],\n    }\n\n    def __init__(self, model, return_layers):\n        if not set(return_layers).issubset([name for name, _ in model.named_children()]):\n            raise ValueError(\"return_layers are not present in model\")\n\n        orig_return_layers = return_layers\n        return_layers = {str(k): str(v) for k, v in return_layers.items()}\n        layers = OrderedDict()\n\n        # 遍历模型子模块按顺序存入有序字典\n        # 只保存layer4及其之前的结构，舍去之后不用的结构\n        for name, module in model.named_children():\n            layers[name] = module\n            if name in return_layers:\n                del return_layers[name]\n            if not return_layers:\n                break\n\n        super().__init__(layers)\n        self.return_layers = orig_return_layers\n\n    def forward(self, x):\n        out = OrderedDict()\n        # 依次遍历模型的所有子模块，并进行正向传播，\n        # 收集layer1, layer2, layer3, layer4的输出\n        for name, module in self.items():\n            x = module(x)\n            if name in self.return_layers:\n                out_name = self.return_layers[name]\n                out[out_name] = x\n        return out\n\n\nclass BackboneWithFPN(nn.Module):\n    \"\"\"\n    Adds a FPN on top of a model.\n    Internally, it uses torchvision.models._utils.IntermediateLayerGetter to\n    extract a submodel that returns the feature maps specified in return_layers.\n    The same limitations of IntermediatLayerGetter apply here.\n    Arguments:\n        backbone (nn.Module)\n        return_layers (Dict[name, new_name]): a dict containing the names\n            of the modules for which the activations will be returned as\n            the key of the dict, and the value of the dict is the name\n            of the returned activation (which the user can specify).\n        in_channels_list (List[int]): number of channels for each feature map\n            that is returned, in the order they are present in the OrderedDict\n        out_channels (int): number of channels in the FPN.\n        extra_blocks: ExtraFPNBlock\n    Attributes:\n        out_channels (int): the number of channels in the FPN\n    \"\"\"\n\n    def __init__(self,\n                 backbone: nn.Module,\n                 return_layers=None,\n                 in_channels_list=None,\n                 out_channels=256,\n                 extra_blocks=None,\n                 re_getter=True):\n        super().__init__()\n\n        if extra_blocks is None:\n            extra_blocks = LastLevelMaxPool()\n\n        if re_getter:\n            assert return_layers is not None\n            self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)\n        else:\n            self.body = backbone\n\n        self.fpn = FeaturePyramidNetwork(\n            in_channels_list=in_channels_list,\n            out_channels=out_channels,\n            extra_blocks=extra_blocks,\n            )\n\n        self.out_channels = out_channels\n\n    def forward(self, x):\n        x = self.body(x)\n        x = self.fpn(x)\n        return x\n\n\nclass ExtraFPNBlock(nn.Module):\n    \"\"\"\n    Base class for the extra block in the FPN.\n\n    Args:\n        results (List[Tensor]): the result of the FPN\n        x (List[Tensor]): the original feature maps\n        names (List[str]): the names for each one of the\n            original feature maps\n\n    Returns:\n        results (List[Tensor]): the extended set of results\n            of the FPN\n        names (List[str]): the extended set of names for the results\n    \"\"\"\n    def forward(self,\n                results: List[Tensor],\n                x: List[Tensor],\n                names: List[str]) -> Tuple[List[Tensor], List[str]]:\n        pass\n\n\nclass LastLevelMaxPool(torch.nn.Module):\n    \"\"\"\n    Applies a max_pool2d on top of the last feature map\n    \"\"\"\n\n    def forward(self, x: List[Tensor], y: List[Tensor], names: List[str]) -> Tuple[List[Tensor], List[str]]:\n        names.append(\"pool\")\n        x.append(F.max_pool2d(x[-1], 1, 2, 0))\n        return x, names\n\n\nclass LastLevelP6P7(ExtraFPNBlock):\n    \"\"\"\n    This module is used in RetinaNet to generate extra layers, P6 and P7.\n    \"\"\"\n    def __init__(self, in_channels: int, out_channels: int):\n        super().__init__()\n        self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)\n        self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1)\n        for module in [self.p6, self.p7]:\n            nn.init.kaiming_uniform_(module.weight, a=1)\n            nn.init.constant_(module.bias, 0)\n        self.use_P5 = in_channels == out_channels\n\n    def forward(self,\n                p: List[Tensor],\n                c: List[Tensor],\n                names: List[str]) -> Tuple[List[Tensor], List[str]]:\n        p5, c5 = p[-1], c[-1]\n        x = p5 if self.use_P5 else c5\n        p6 = self.p6(x)\n        p7 = self.p7(F.relu(p6))\n        p.extend([p6, p7])\n        names.extend([\"p6\", \"p7\"])\n        return p, names\n\n\nclass FeaturePyramidNetwork(nn.Module):\n    \"\"\"\n    Module that adds a FPN from on top of a set of feature maps. This is based on\n    `\"Feature Pyramid Network for Object Detection\" <https://arxiv.org/abs/1612.03144>`_.\n    The feature maps are currently supposed to be in increasing depth\n    order.\n    The input to the model is expected to be an OrderedDict[Tensor], containing\n    the feature maps on top of which the FPN will be added.\n    Arguments:\n        in_channels_list (list[int]): number of channels for each feature map that\n            is passed to the module\n        out_channels (int): number of channels of the FPN representation\n        extra_blocks (ExtraFPNBlock or None): if provided, extra operations will\n            be performed. It is expected to take the fpn features, the original\n            features and the names of the original features as input, and returns\n            a new list of feature maps and their corresponding names\n    \"\"\"\n\n    def __init__(self, in_channels_list, out_channels, extra_blocks=None):\n        super().__init__()\n        # 用来调整resnet特征矩阵(layer1,2,3,4)的channel（kernel_size=1）\n        self.inner_blocks = nn.ModuleList()\n        # 对调整后的特征矩阵使用3x3的卷积核来得到对应的预测特征矩阵\n        self.layer_blocks = nn.ModuleList()\n        for in_channels in in_channels_list:\n            if in_channels == 0:\n                continue\n            inner_block_module = nn.Conv2d(in_channels, out_channels, 1)\n            layer_block_module = nn.Conv2d(out_channels, out_channels, 3, padding=1)\n            self.inner_blocks.append(inner_block_module)\n            self.layer_blocks.append(layer_block_module)\n\n        # initialize parameters now to avoid modifying the initialization of top_blocks\n        for m in self.children():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_uniform_(m.weight, a=1)\n                nn.init.constant_(m.bias, 0)\n\n        self.extra_blocks = extra_blocks\n\n    def get_result_from_inner_blocks(self, x: Tensor, idx: int) -> Tensor:\n        \"\"\"\n        This is equivalent to self.inner_blocks[idx](x),\n        but torchscript doesn't support this yet\n        \"\"\"\n        num_blocks = len(self.inner_blocks)\n        if idx < 0:\n            idx += num_blocks\n        i = 0\n        out = x\n        for module in self.inner_blocks:\n            if i == idx:\n                out = module(x)\n            i += 1\n        return out\n\n    def get_result_from_layer_blocks(self, x: Tensor, idx: int) -> Tensor:\n        \"\"\"\n        This is equivalent to self.layer_blocks[idx](x),\n        but torchscript doesn't support this yet\n        \"\"\"\n        num_blocks = len(self.layer_blocks)\n        if idx < 0:\n            idx += num_blocks\n        i = 0\n        out = x\n        for module in self.layer_blocks:\n            if i == idx:\n                out = module(x)\n            i += 1\n        return out\n\n    def forward(self, x: Dict[str, Tensor]) -> Dict[str, Tensor]:\n        \"\"\"\n        Computes the FPN for a set of feature maps.\n        Arguments:\n            x (OrderedDict[Tensor]): feature maps for each feature level.\n        Returns:\n            results (OrderedDict[Tensor]): feature maps after FPN layers.\n                They are ordered from highest resolution first.\n        \"\"\"\n        # unpack OrderedDict into two lists for easier handling\n        names = list(x.keys())\n        x = list(x.values())\n\n        # 将resnet layer4的channel调整到指定的out_channels\n        # last_inner = self.inner_blocks[-1](x[-1])\n        last_inner = self.get_result_from_inner_blocks(x[-1], -1)\n        # result中保存着每个预测特征层\n        results = []\n        # 将layer4调整channel后的特征矩阵，通过3x3卷积后得到对应的预测特征矩阵\n        # results.append(self.layer_blocks[-1](last_inner))\n        results.append(self.get_result_from_layer_blocks(last_inner, -1))\n\n        for idx in range(len(x) - 2, -1, -1):\n            inner_lateral = self.get_result_from_inner_blocks(x[idx], idx)\n            feat_shape = inner_lateral.shape[-2:]\n            inner_top_down = F.interpolate(last_inner, size=feat_shape, mode=\"nearest\")\n            last_inner = inner_lateral + inner_top_down\n            results.insert(0, self.get_result_from_layer_blocks(last_inner, idx))\n\n        # 在layer4对应的预测特征层基础上生成预测特征矩阵5\n        if self.extra_blocks is not None:\n            results, names = self.extra_blocks(results, x, names)\n\n        # make it back an OrderedDict\n        out = OrderedDict([(k, v) for k, v in zip(names, results)])\n\n        return out\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/backbone/resnet50_fpn_model.py",
    "content": "import os\n\nimport torch.nn as nn\nimport torch\nfrom torchvision.ops.misc import FrozenBatchNorm2d\n\nfrom .feature_pyramid_network import LastLevelMaxPool, BackboneWithFPN\n\n\nclass Bottleneck(nn.Module):\n    expansion = 4\n\n    def __init__(self, in_channel, out_channel, stride=1, downsample=None, norm_layer=None):\n        super().__init__()\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n\n        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,\n                               kernel_size=1, stride=1, bias=False)  # squeeze channels\n        self.bn1 = norm_layer(out_channel)\n        # -----------------------------------------\n        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,\n                               kernel_size=3, stride=stride, bias=False, padding=1)\n        self.bn2 = norm_layer(out_channel)\n        # -----------------------------------------\n        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel * self.expansion,\n                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels\n        self.bn3 = norm_layer(out_channel * self.expansion)\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n\n    def forward(self, x):\n        identity = x\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass ResNet(nn.Module):\n\n    def __init__(self, block, blocks_num, num_classes=1000, include_top=True, norm_layer=None):\n        super().__init__()\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n        self._norm_layer = norm_layer\n\n        self.include_top = include_top\n        self.in_channel = 64\n\n        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,\n                               padding=3, bias=False)\n        self.bn1 = norm_layer(self.in_channel)\n        self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.layer1 = self._make_layer(block, 64, blocks_num[0])\n        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)\n        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)\n        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)\n        if self.include_top:\n            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)\n            self.fc = nn.Linear(512 * block.expansion, num_classes)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')\n\n    def _make_layer(self, block, channel, block_num, stride=1):\n        norm_layer = self._norm_layer\n        downsample = None\n        if stride != 1 or self.in_channel != channel * block.expansion:\n            downsample = nn.Sequential(\n                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),\n                norm_layer(channel * block.expansion))\n\n        layers = []\n        layers.append(block(self.in_channel, channel, downsample=downsample,\n                            stride=stride, norm_layer=norm_layer))\n        self.in_channel = channel * block.expansion\n\n        for _ in range(1, block_num):\n            layers.append(block(self.in_channel, channel, norm_layer=norm_layer))\n\n        return nn.Sequential(*layers)\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n\n        if self.include_top:\n            x = self.avgpool(x)\n            x = torch.flatten(x, 1)\n            x = self.fc(x)\n\n        return x\n\n\ndef overwrite_eps(model, eps):\n    \"\"\"\n    This method overwrites the default eps values of all the\n    FrozenBatchNorm2d layers of the model with the provided value.\n    This is necessary to address the BC-breaking change introduced\n    by the bug-fix at pytorch/vision#2933. The overwrite is applied\n    only when the pretrained weights are loaded to maintain compatibility\n    with previous versions.\n\n    Args:\n        model (nn.Module): The model on which we perform the overwrite.\n        eps (float): The new value of eps.\n    \"\"\"\n    for module in model.modules():\n        if isinstance(module, FrozenBatchNorm2d):\n            module.eps = eps\n\n\ndef resnet50_fpn_backbone(pretrain_path=\"\",\n                          norm_layer=FrozenBatchNorm2d,   # FrozenBatchNorm2d的功能与BatchNorm2d类似，但参数无法更新\n                          trainable_layers=3,\n                          returned_layers=None,\n                          extra_blocks=None):\n    \"\"\"\n    搭建resnet50_fpn——backbone\n    Args:\n        pretrain_path: resnet50的预训练权重，如果不使用就默认为空\n        norm_layer: 官方默认的是FrozenBatchNorm2d，即不会更新参数的bn层(因为如果batch_size设置的很小会导致效果更差，还不如不用bn层)\n                    如果自己的GPU显存很大可以设置很大的batch_size，那么自己可以传入正常的BatchNorm2d层\n                    (https://github.com/facebookresearch/maskrcnn-benchmark/issues/267)\n        trainable_layers: 指定训练哪些层结构\n        returned_layers: 指定哪些层的输出需要返回\n        extra_blocks: 在输出的特征层基础上额外添加的层结构\n\n    Returns:\n\n    \"\"\"\n    resnet_backbone = ResNet(Bottleneck, [3, 4, 6, 3],\n                             include_top=False,\n                             norm_layer=norm_layer)\n\n    if isinstance(norm_layer, FrozenBatchNorm2d):\n        overwrite_eps(resnet_backbone, 0.0)\n\n    if pretrain_path != \"\":\n        assert os.path.exists(pretrain_path), \"{} is not exist.\".format(pretrain_path)\n        # 载入预训练权重\n        print(resnet_backbone.load_state_dict(torch.load(pretrain_path), strict=False))\n\n    # select layers that wont be frozen\n    assert 0 <= trainable_layers <= 5\n    layers_to_train = ['layer4', 'layer3', 'layer2', 'layer1', 'conv1'][:trainable_layers]\n\n    # 如果要训练所有层结构的话，不要忘了conv1后还有一个bn1\n    if trainable_layers == 5:\n        layers_to_train.append(\"bn1\")\n\n    # freeze layers\n    for name, parameter in resnet_backbone.named_parameters():\n        # 只训练不在layers_to_train列表中的层结构\n        if all([not name.startswith(layer) for layer in layers_to_train]):\n            parameter.requires_grad_(False)\n\n    if extra_blocks is None:\n        extra_blocks = LastLevelMaxPool()\n\n    if returned_layers is None:\n        returned_layers = [1, 2, 3, 4]\n    # 返回的特征层个数肯定大于0小于5\n    assert min(returned_layers) > 0 and max(returned_layers) < 5\n\n    # return_layers = {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'}\n    return_layers = {f'layer{k}': str(v) for v, k in enumerate(returned_layers)}\n\n    # in_channel 为layer4的输出特征矩阵channel = 2048\n    in_channels_stage2 = resnet_backbone.in_channel // 8  # 256\n    # 记录resnet50提供给fpn的特征层channels\n    in_channels_list = [in_channels_stage2 * 2 ** (i - 1) for i in returned_layers]\n    # 通过fpn后得到的每个特征层的channel\n    out_channels = 256\n    return BackboneWithFPN(resnet_backbone, return_layers, in_channels_list, out_channels, extra_blocks=extra_blocks)\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/draw_box_utils.py",
    "content": "from PIL.Image import Image, fromarray\nimport PIL.ImageDraw as ImageDraw\nimport PIL.ImageFont as ImageFont\nfrom PIL import ImageColor\nimport numpy as np\n\nSTANDARD_COLORS = [\n    'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',\n    'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',\n    'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',\n    'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',\n    'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',\n    'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',\n    'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',\n    'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',\n    'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',\n    'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',\n    'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',\n    'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',\n    'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',\n    'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',\n    'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',\n    'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',\n    'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',\n    'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',\n    'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',\n    'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',\n    'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',\n    'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',\n    'WhiteSmoke', 'Yellow', 'YellowGreen'\n]\n\n\ndef draw_text(draw,\n              box: list,\n              cls: int,\n              score: float,\n              category_index: dict,\n              color: str,\n              font: str = 'arial.ttf',\n              font_size: int = 24):\n    \"\"\"\n    将目标边界框和类别信息绘制到图片上\n    \"\"\"\n    try:\n        font = ImageFont.truetype(font, font_size)\n    except IOError:\n        font = ImageFont.load_default()\n\n    left, top, right, bottom = box\n    # If the total height of the display strings added to the top of the bounding\n    # box exceeds the top of the image, stack the strings below the bounding box\n    # instead of above.\n    display_str = f\"{category_index[str(cls)]}: {int(100 * score)}%\"\n    display_str_heights = [font.getsize(ds)[1] for ds in display_str]\n    # Each display_str has a top and bottom margin of 0.05x.\n    display_str_height = (1 + 2 * 0.05) * max(display_str_heights)\n\n    if top > display_str_height:\n        text_top = top - display_str_height\n        text_bottom = top\n    else:\n        text_top = bottom\n        text_bottom = bottom + display_str_height\n\n    for ds in display_str:\n        text_width, text_height = font.getsize(ds)\n        margin = np.ceil(0.05 * text_width)\n        draw.rectangle([(left, text_top),\n                        (left + text_width + 2 * margin, text_bottom)], fill=color)\n        draw.text((left + margin, text_top),\n                  ds,\n                  fill='black',\n                  font=font)\n        left += text_width\n\n\ndef draw_masks(image, masks, colors, thresh: float = 0.7, alpha: float = 0.5):\n    np_image = np.array(image)\n    masks = np.where(masks > thresh, True, False)\n\n    # colors = np.array(colors)\n    img_to_draw = np.copy(np_image)\n    # TODO: There might be a way to vectorize this\n    for mask, color in zip(masks, colors):\n        img_to_draw[mask] = color\n\n    out = np_image * (1 - alpha) + img_to_draw * alpha\n    return fromarray(out.astype(np.uint8))\n\n\ndef draw_objs(image: Image,\n              boxes: np.ndarray = None,\n              classes: np.ndarray = None,\n              scores: np.ndarray = None,\n              masks: np.ndarray = None,\n              category_index: dict = None,\n              box_thresh: float = 0.1,\n              mask_thresh: float = 0.5,\n              line_thickness: int = 8,\n              font: str = 'arial.ttf',\n              font_size: int = 24,\n              draw_boxes_on_image: bool = True,\n              draw_masks_on_image: bool = False):\n    \"\"\"\n    将目标边界框信息，类别信息，mask信息绘制在图片上\n    Args:\n        image: 需要绘制的图片\n        boxes: 目标边界框信息\n        classes: 目标类别信息\n        scores: 目标概率信息\n        masks: 目标mask信息\n        category_index: 类别与名称字典\n        box_thresh: 过滤的概率阈值\n        mask_thresh:\n        line_thickness: 边界框宽度\n        font: 字体类型\n        font_size: 字体大小\n        draw_boxes_on_image:\n        draw_masks_on_image:\n\n    Returns:\n\n    \"\"\"\n\n    # 过滤掉低概率的目标\n    idxs = np.greater(scores, box_thresh)\n    boxes = boxes[idxs]\n    classes = classes[idxs]\n    scores = scores[idxs]\n    if masks is not None:\n        masks = masks[idxs]\n    if len(boxes) == 0:\n        return image\n\n    colors = [ImageColor.getrgb(STANDARD_COLORS[cls % len(STANDARD_COLORS)]) for cls in classes]\n\n    if draw_boxes_on_image:\n        # Draw all boxes onto image.\n        draw = ImageDraw.Draw(image)\n        for box, cls, score, color in zip(boxes, classes, scores, colors):\n            left, top, right, bottom = box\n            # 绘制目标边界框\n            draw.line([(left, top), (left, bottom), (right, bottom),\n                       (right, top), (left, top)], width=line_thickness, fill=color)\n            # 绘制类别和概率信息\n            draw_text(draw, box.tolist(), int(cls), float(score), category_index, color, font, font_size)\n\n    if draw_masks_on_image and (masks is not None):\n        # Draw all mask onto image.\n        image = draw_masks(image, masks, colors, mask_thresh)\n\n    return image\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/my_dataset.py",
    "content": "from torch.utils.data import Dataset\nimport os\nimport torch\nimport json\nfrom PIL import Image\nfrom lxml import etree\n\n\nclass VOCDataSet(Dataset):\n    \"\"\"读取解析PASCAL VOC2007/2012数据集\"\"\"\n\n    def __init__(self, voc_root, year=\"2012\", transforms=None, txt_name: str = \"train.txt\"):\n        assert year in [\"2007\", \"2012\"], \"year must be in ['2007', '2012']\"\n        # 增加容错能力\n        if \"VOCdevkit\" in voc_root:\n            self.root = os.path.join(voc_root, f\"VOC{year}\")\n        else:\n            self.root = os.path.join(voc_root, \"VOCdevkit\", f\"VOC{year}\")\n        self.img_root = os.path.join(self.root, \"JPEGImages\")\n        self.annotations_root = os.path.join(self.root, \"Annotations\")\n\n        # read train.txt or val.txt file\n        txt_path = os.path.join(self.root, \"ImageSets\", \"Main\", txt_name)\n        assert os.path.exists(txt_path), \"not found {} file.\".format(txt_name)\n\n        with open(txt_path) as read:\n            self.xml_list = [os.path.join(self.annotations_root, line.strip() + \".xml\")\n                             for line in read.readlines() if len(line.strip()) > 0]\n\n        # check file\n        assert len(self.xml_list) > 0, \"in '{}' file does not find any information.\".format(txt_path)\n        for xml_path in self.xml_list:\n            assert os.path.exists(xml_path), \"not found '{}' file.\".format(xml_path)\n\n        # read class_indict\n        json_file = './pascal_voc_classes.json'\n        assert os.path.exists(json_file), \"{} file not exist.\".format(json_file)\n        with open(json_file, 'r') as f:\n            self.class_dict = json.load(f)\n\n        self.transforms = transforms\n\n    def __len__(self):\n        return len(self.xml_list)\n\n    def __getitem__(self, idx):\n        # read xml\n        xml_path = self.xml_list[idx]\n        with open(xml_path) as fid:\n            xml_str = fid.read()\n        xml = etree.fromstring(xml_str)\n        data = self.parse_xml_to_dict(xml)[\"annotation\"]\n        img_path = os.path.join(self.img_root, data[\"filename\"])\n        image = Image.open(img_path)\n        if image.format != \"JPEG\":\n            raise ValueError(\"Image '{}' format not JPEG\".format(img_path))\n\n        boxes = []\n        labels = []\n        iscrowd = []\n        assert \"object\" in data, \"{} lack of object information.\".format(xml_path)\n        for obj in data[\"object\"]:\n            xmin = float(obj[\"bndbox\"][\"xmin\"])\n            xmax = float(obj[\"bndbox\"][\"xmax\"])\n            ymin = float(obj[\"bndbox\"][\"ymin\"])\n            ymax = float(obj[\"bndbox\"][\"ymax\"])\n            boxes.append([xmin, ymin, xmax, ymax])\n            labels.append(self.class_dict[obj[\"name\"]])\n            if \"difficult\" in obj:\n                iscrowd.append(int(obj[\"difficult\"]))\n            else:\n                iscrowd.append(0)\n\n        # convert everything into a torch.Tensor\n        boxes = torch.as_tensor(boxes, dtype=torch.float32)\n        labels = torch.as_tensor(labels, dtype=torch.int64)\n        iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)\n        image_id = torch.tensor([idx])\n        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])\n\n        target = {}\n        target[\"boxes\"] = boxes\n        target[\"labels\"] = labels\n        target[\"image_id\"] = image_id\n        target[\"area\"] = area\n        target[\"iscrowd\"] = iscrowd\n\n        if self.transforms is not None:\n            image, target = self.transforms(image, target)\n\n        return image, target\n\n    def get_height_and_width(self, idx):\n        # read xml\n        xml_path = self.xml_list[idx]\n        with open(xml_path) as fid:\n            xml_str = fid.read()\n        xml = etree.fromstring(xml_str)\n        data = self.parse_xml_to_dict(xml)[\"annotation\"]\n        data_height = int(data[\"size\"][\"height\"])\n        data_width = int(data[\"size\"][\"width\"])\n        return data_height, data_width\n\n    def parse_xml_to_dict(self, xml):\n        \"\"\"\n        将xml文件解析成字典形式，参考tensorflow的recursive_parse_xml_to_dict\n        Args:\n            xml: xml tree obtained by parsing XML file contents using lxml.etree\n\n        Returns:\n            Python dictionary holding XML contents.\n        \"\"\"\n\n        if len(xml) == 0:  # 遍历到底层，直接返回tag对应的信息\n            return {xml.tag: xml.text}\n\n        result = {}\n        for child in xml:\n            child_result = self.parse_xml_to_dict(child)  # 递归遍历标签信息\n            if child.tag != 'object':\n                result[child.tag] = child_result[child.tag]\n            else:\n                if child.tag not in result:  # 因为object可能有多个，所以需要放入列表里\n                    result[child.tag] = []\n                result[child.tag].append(child_result[child.tag])\n        return {xml.tag: result}\n\n    def coco_index(self, idx):\n        \"\"\"\n        该方法是专门为pycocotools统计标签信息准备，不对图像和标签作任何处理\n        由于不用去读取图片，可大幅缩减统计时间\n\n        Args:\n            idx: 输入需要获取图像的索引\n        \"\"\"\n        # read xml\n        xml_path = self.xml_list[idx]\n        with open(xml_path) as fid:\n            xml_str = fid.read()\n        xml = etree.fromstring(xml_str)\n        data = self.parse_xml_to_dict(xml)[\"annotation\"]\n        data_height = int(data[\"size\"][\"height\"])\n        data_width = int(data[\"size\"][\"width\"])\n        # img_path = os.path.join(self.img_root, data[\"filename\"])\n        # image = Image.open(img_path)\n        # if image.format != \"JPEG\":\n        #     raise ValueError(\"Image format not JPEG\")\n        boxes = []\n        labels = []\n        iscrowd = []\n        for obj in data[\"object\"]:\n            xmin = float(obj[\"bndbox\"][\"xmin\"])\n            xmax = float(obj[\"bndbox\"][\"xmax\"])\n            ymin = float(obj[\"bndbox\"][\"ymin\"])\n            ymax = float(obj[\"bndbox\"][\"ymax\"])\n\n            # 进一步检查数据，有的标注信息中可能有w或h为0的情况，这样的数据会导致计算回归loss为nan\n            if xmax <= xmin or ymax <= ymin:\n                print(\"Warning: in '{}' xml, there are some bbox w/h <=0\".format(xml_path))\n                continue\n                \n            boxes.append([xmin, ymin, xmax, ymax])\n            labels.append(self.class_dict[obj[\"name\"]])\n            iscrowd.append(int(obj[\"difficult\"]))\n\n        # convert everything into a torch.Tensor\n        boxes = torch.as_tensor(boxes, dtype=torch.float32)\n        labels = torch.as_tensor(labels, dtype=torch.int64)\n        iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)\n        image_id = torch.tensor([idx])\n        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])\n\n        target = {}\n        target[\"boxes\"] = boxes\n        target[\"labels\"] = labels\n        target[\"image_id\"] = image_id\n        target[\"area\"] = area\n        target[\"iscrowd\"] = iscrowd\n\n        return (data_height, data_width), target\n\n    @staticmethod\n    def collate_fn(batch):\n        return tuple(zip(*batch))\n\n# import transforms\n# from draw_box_utils import draw_objs\n# from PIL import Image\n# import json\n# import matplotlib.pyplot as plt\n# import torchvision.transforms as ts\n# import random\n#\n# # read class_indict\n# category_index = {}\n# try:\n#     json_file = open('./pascal_voc_classes.json', 'r')\n#     class_dict = json.load(json_file)\n#     category_index = {str(v): str(k) for k, v in class_dict.items()}\n# except Exception as e:\n#     print(e)\n#     exit(-1)\n#\n# data_transform = {\n#     \"train\": transforms.Compose([transforms.ToTensor(),\n#                                  transforms.RandomHorizontalFlip(0.5)]),\n#     \"val\": transforms.Compose([transforms.ToTensor()])\n# }\n#\n# # load train data set\n# train_data_set = VOCDataSet(os.getcwd(), \"2012\", data_transform[\"train\"], \"train.txt\")\n# print(len(train_data_set))\n# for index in random.sample(range(0, len(train_data_set)), k=5):\n#     img, target = train_data_set[index]\n#     img = ts.ToPILImage()(img)\n#     plot_img = draw_objs(img,\n#                          target[\"boxes\"].numpy(),\n#                          target[\"labels\"].numpy(),\n#                          np.ones(target[\"labels\"].shape[0]),\n#                          category_index=category_index,\n#                          box_thresh=0.5,\n#                          line_thickness=3,\n#                          font='arial.ttf',\n#                          font_size=20)\n#     plt.imshow(plot_img)\n#     plt.show()\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/network_files/__init__.py",
    "content": "from .retinanet import RetinaNet\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/network_files/anchor_utils.py",
    "content": "from typing import List, Optional, Dict\n\nimport torch\nfrom torch import nn, Tensor\n\nfrom .image_list import ImageList\n\n\nclass AnchorsGenerator(nn.Module):\n    __annotations__ = {\n        \"cell_anchors\": Optional[List[torch.Tensor]],\n        \"_cache\": Dict[str, List[torch.Tensor]]\n    }\n\n    \"\"\"\n    anchors生成器\n    Module that generates anchors for a set of feature maps and\n    image sizes.\n\n    The module support computing anchors at multiple sizes and aspect ratios\n    per feature map.\n\n    sizes and aspect_ratios should have the same number of elements, and it should\n    correspond to the number of feature maps.\n\n    sizes[i] and aspect_ratios[i] can have an arbitrary number of elements,\n    and AnchorGenerator will output a set of sizes[i] * aspect_ratios[i] anchors\n    per spatial location for feature map i.\n\n    Arguments:\n        sizes (Tuple[Tuple[int]]):\n        aspect_ratios (Tuple[Tuple[float]]):\n    \"\"\"\n\n    def __init__(self, sizes=(128, 256, 512), aspect_ratios=(0.5, 1.0, 2.0)):\n        super(AnchorsGenerator, self).__init__()\n\n        if not isinstance(sizes[0], (list, tuple)):\n            # TODO change this\n            sizes = tuple((s,) for s in sizes)\n        if not isinstance(aspect_ratios[0], (list, tuple)):\n            aspect_ratios = (aspect_ratios,) * len(sizes)\n\n        assert len(sizes) == len(aspect_ratios)\n\n        self.sizes = sizes\n        self.aspect_ratios = aspect_ratios\n        self.cell_anchors = None\n        self._cache = {}\n\n    def generate_anchors(self, scales, aspect_ratios, dtype=torch.float32, device=torch.device(\"cpu\")):\n        # type: (List[int], List[float], torch.dtype, torch.device) -> Tensor\n        \"\"\"\n        compute anchor sizes\n        Arguments:\n            scales: sqrt(anchor_area)\n            aspect_ratios: h/w ratios\n            dtype: float32\n            device: cpu/gpu\n        \"\"\"\n        scales = torch.as_tensor(scales, dtype=dtype, device=device)\n        aspect_ratios = torch.as_tensor(aspect_ratios, dtype=dtype, device=device)\n        h_ratios = torch.sqrt(aspect_ratios)\n        w_ratios = 1.0 / h_ratios\n\n        # [r1, r2, r3]' * [s1, s2, s3]\n        # number of elements is len(ratios)*len(scales)\n        ws = (w_ratios[:, None] * scales[None, :]).view(-1)\n        hs = (h_ratios[:, None] * scales[None, :]).view(-1)\n\n        # left-top, right-bottom coordinate relative to anchor center(0, 0)\n        # 生成的anchors模板都是以（0, 0）为中心的, shape [len(ratios)*len(scales), 4]\n        base_anchors = torch.stack([-ws, -hs, ws, hs], dim=1) / 2\n\n        return base_anchors.round()  # round 四舍五入\n\n    def set_cell_anchors(self, dtype, device):\n        # type: (torch.dtype, torch.device) -> None\n        if self.cell_anchors is not None:\n            cell_anchors = self.cell_anchors\n            assert cell_anchors is not None\n            # suppose that all anchors have the same device\n            # which is a valid assumption in the current state of the codebase\n            if cell_anchors[0].device == device:\n                return\n\n        # 根据提供的sizes和aspect_ratios生成anchors模板\n        # anchors模板都是以(0, 0)为中心的anchor\n        cell_anchors = [\n            self.generate_anchors(sizes, aspect_ratios, dtype, device)\n            for sizes, aspect_ratios in zip(self.sizes, self.aspect_ratios)\n        ]\n        self.cell_anchors = cell_anchors\n\n    def num_anchors_per_location(self):\n        # 计算每个预测特征层上每个滑动窗口的预测目标数\n        return [len(s) * len(a) for s, a in zip(self.sizes, self.aspect_ratios)]\n\n    # For every combination of (a, (g, s), i) in (self.cell_anchors, zip(grid_sizes, strides), 0:2),\n    # output g[i] anchors that are s[i] distance apart in direction i, with the same dimensions as a.\n    def grid_anchors(self, grid_sizes, strides):\n        # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor]\n        \"\"\"\n        anchors position in grid coordinate axis map into origin image\n        计算预测特征图对应原始图像上的所有anchors的坐标\n        Args:\n            grid_sizes: 预测特征矩阵的height和width\n            strides: 预测特征矩阵上一步对应原始图像上的步距\n        \"\"\"\n        anchors = []\n        cell_anchors = self.cell_anchors\n        assert cell_anchors is not None\n\n        # 遍历每个预测特征层的grid_size，strides和cell_anchors\n        for size, stride, base_anchors in zip(grid_sizes, strides, cell_anchors):\n            grid_height, grid_width = size\n            stride_height, stride_width = stride\n            device = base_anchors.device\n\n            # For output anchor, compute [x_center, y_center, x_center, y_center]\n            # shape: [grid_width] 对应原图上的x坐标(列)\n            shifts_x = torch.arange(0, grid_width, dtype=torch.float32, device=device) * stride_width\n            # shape: [grid_height] 对应原图上的y坐标(行)\n            shifts_y = torch.arange(0, grid_height, dtype=torch.float32, device=device) * stride_height\n\n            # 计算预测特征矩阵上每个点对应原图上的坐标(anchors模板的坐标偏移量)\n            # torch.meshgrid函数分别传入行坐标和列坐标，生成网格行坐标矩阵和网格列坐标矩阵\n            # shape: [grid_height, grid_width]\n            shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)\n            shift_x = shift_x.reshape(-1)\n            shift_y = shift_y.reshape(-1)\n\n            # 计算anchors坐标(xmin, ymin, xmax, ymax)在原图上的坐标偏移量\n            # shape: [grid_width*grid_height, 4]\n            shifts = torch.stack([shift_x, shift_y, shift_x, shift_y], dim=1)\n\n            # For every (base anchor, output anchor) pair,\n            # offset each zero-centered base anchor by the center of the output anchor.\n            # 将anchors模板与原图上的坐标偏移量相加得到原图上所有anchors的坐标信息(shape不同时会使用广播机制)\n            shifts_anchor = shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)\n            anchors.append(shifts_anchor.reshape(-1, 4))\n\n        return anchors  # List[Tensor(all_num_anchors, 4)]\n\n    def cached_grid_anchors(self, grid_sizes, strides):\n        # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor]\n        \"\"\"将计算得到的所有anchors信息进行缓存\"\"\"\n        key = str(grid_sizes) + str(strides)\n        # self._cache是字典类型\n        if key in self._cache:\n            return self._cache[key]\n        anchors = self.grid_anchors(grid_sizes, strides)\n        self._cache[key] = anchors\n        return anchors\n\n    def forward(self, image_list, feature_maps):\n        # type: (ImageList, List[Tensor]) -> List[Tensor]\n        # 获取每个预测特征层的尺寸(height, width)\n        grid_sizes = list([feature_map.shape[-2:] for feature_map in feature_maps])\n\n        # 获取输入图像的height和width\n        image_size = image_list.tensors.shape[-2:]\n\n        # 获取变量类型和设备类型\n        dtype, device = feature_maps[0].dtype, feature_maps[0].device\n\n        # one step in feature map equate n pixel stride in origin image\n        # 计算特征层上的一步等于原始图像上的步长\n        strides = [[torch.tensor(image_size[0] // g[0], dtype=torch.int64, device=device),\n                    torch.tensor(image_size[1] // g[1], dtype=torch.int64, device=device)] for g in grid_sizes]\n\n        # 根据提供的sizes和aspect_ratios生成anchors模板\n        self.set_cell_anchors(dtype, device)\n\n        # 计算/读取所有anchors的坐标信息（这里的anchors信息是映射到原图上的所有anchors信息，不是anchors模板）\n        # 得到的是一个list列表，对应每张预测特征图映射回原图的anchors坐标信息\n        anchors_over_all_feature_maps = self.cached_grid_anchors(grid_sizes, strides)\n\n        anchors = torch.jit.annotate(List[List[torch.Tensor]], [])\n        # 遍历一个batch中的每张图像\n        for i, (image_height, image_width) in enumerate(image_list.image_sizes):\n            anchors_in_image = []\n            # 遍历每张预测特征图映射回原图的anchors坐标信息\n            for anchors_per_feature_map in anchors_over_all_feature_maps:\n                anchors_in_image.append(anchors_per_feature_map)\n            anchors.append(anchors_in_image)\n        # 将每一张图像的所有预测特征层的anchors坐标信息拼接在一起\n        # anchors是个list，每个元素为一张图像的所有anchors信息\n        anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors]\n        # Clear the cache in case that memory leaks.\n        self._cache.clear()\n        return anchors\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/network_files/boxes.py",
    "content": "import torch\nfrom typing import Tuple\nfrom torch import Tensor\nimport torchvision\n\n\ndef nms(boxes, scores, iou_threshold):\n    # type: (Tensor, Tensor, float) -> Tensor\n    \"\"\"\n    Performs non-maximum suppression (NMS) on the boxes according\n    to their intersection-over-union (IoU).\n\n    NMS iteratively removes lower scoring boxes which have an\n    IoU greater than iou_threshold with another (higher scoring)\n    box.\n\n    Parameters\n    ----------\n    boxes : Tensor[N, 4])\n        boxes to perform NMS on. They\n        are expected to be in (x1, y1, x2, y2) format\n    scores : Tensor[N]\n        scores for each one of the boxes\n    iou_threshold : float\n        discards all overlapping\n        boxes with IoU > iou_threshold\n\n    Returns\n    -------\n    keep : Tensor\n        int64 tensor with the indices\n        of the elements that have been kept\n        by NMS, sorted in decreasing order of scores\n    \"\"\"\n    return torch.ops.torchvision.nms(boxes, scores, iou_threshold)\n\n\ndef batched_nms(boxes, scores, idxs, iou_threshold):\n    # type: (Tensor, Tensor, Tensor, float) -> Tensor\n    \"\"\"\n    Performs non-maximum suppression in a batched fashion.\n\n    Each index value correspond to a category, and NMS\n    will not be applied between elements of different categories.\n\n    Parameters\n    ----------\n    boxes : Tensor[N, 4]\n        boxes where NMS will be performed. They\n        are expected to be in (x1, y1, x2, y2) format\n    scores : Tensor[N]\n        scores for each one of the boxes\n    idxs : Tensor[N]\n        indices of the categories for each one of the boxes.\n    iou_threshold : float\n        discards all overlapping boxes\n        with IoU < iou_threshold\n\n    Returns\n    -------\n    keep : Tensor\n        int64 tensor with the indices of\n        the elements that have been kept by NMS, sorted\n        in decreasing order of scores\n    \"\"\"\n    if boxes.numel() == 0:\n        return torch.empty((0,), dtype=torch.int64, device=boxes.device)\n\n    # strategy: in order to perform NMS independently per class.\n    # we add an offset to all the boxes. The offset is dependent\n    # only on the class idx, and is large enough so that boxes\n    # from different classes do not overlap\n    # 获取所有boxes中最大的坐标值（xmin, ymin, xmax, ymax）\n    max_coordinate = boxes.max()\n\n    # to(): Performs Tensor dtype and/or device conversion\n    # 为每一个类别/每一层生成一个很大的偏移量\n    # 这里的to只是让生成tensor的dytpe和device与boxes保持一致\n    offsets = idxs.to(boxes) * (max_coordinate + 1)\n    # boxes加上对应层的偏移量后，保证不同类别/层之间boxes不会有重合的现象\n    boxes_for_nms = boxes + offsets[:, None]\n    keep = nms(boxes_for_nms, scores, iou_threshold)\n    return keep\n\n\ndef remove_small_boxes(boxes, min_size):\n    # type: (Tensor, float) -> Tensor\n    \"\"\"\n    Remove boxes which contains at least one side smaller than min_size.\n    移除宽高小于指定阈值的索引\n    Arguments:\n        boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format\n        min_size (float): minimum size\n\n    Returns:\n        keep (Tensor[K]): indices of the boxes that have both sides\n            larger than min_size\n    \"\"\"\n    ws, hs = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1]  # 预测boxes的宽和高\n    # keep = (ws >= min_size) & (hs >= min_size)  # 当满足宽，高都大于给定阈值时为True\n    keep = torch.logical_and(torch.ge(ws, min_size), torch.ge(hs, min_size))\n    # nonzero(): Returns a tensor containing the indices of all non-zero elements of input\n    # keep = keep.nonzero().squeeze(1)\n    keep = torch.where(keep)[0]\n    return keep\n\n\ndef clip_boxes_to_image(boxes, size):\n    # type: (Tensor, Tuple[int, int]) -> Tensor\n    \"\"\"\n    Clip boxes so that they lie inside an image of size `size`.\n    裁剪预测的boxes信息，将越界的坐标调整到图片边界上\n\n    Arguments:\n        boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format\n        size (Tuple[height, width]): size of the image\n\n    Returns:\n        clipped_boxes (Tensor[N, 4])\n    \"\"\"\n    dim = boxes.dim()\n    boxes_x = boxes[..., 0::2]  # x1, x2\n    boxes_y = boxes[..., 1::2]  # y1, y2\n    height, width = size\n\n    if torchvision._is_tracing():\n        boxes_x = torch.max(boxes_x, torch.tensor(0, dtype=boxes.dtype, device=boxes.device))\n        boxes_x = torch.min(boxes_x, torch.tensor(width, dtype=boxes.dtype, device=boxes.device))\n        boxes_y = torch.max(boxes_y, torch.tensor(0, dtype=boxes.dtype, device=boxes.device))\n        boxes_y = torch.min(boxes_y, torch.tensor(height, dtype=boxes.dtype, device=boxes.device))\n    else:\n        boxes_x = boxes_x.clamp(min=0, max=width)   # 限制x坐标范围在[0,width]之间\n        boxes_y = boxes_y.clamp(min=0, max=height)  # 限制y坐标范围在[0,height]之间\n\n    clipped_boxes = torch.stack((boxes_x, boxes_y), dim=dim)\n    return clipped_boxes.reshape(boxes.shape)\n\n\ndef box_area(boxes):\n    \"\"\"\n    Computes the area of a set of bounding boxes, which are specified by its\n    (x1, y1, x2, y2) coordinates.\n\n    Arguments:\n        boxes (Tensor[N, 4]): boxes for which the area will be computed. They\n            are expected to be in (x1, y1, x2, y2) format\n\n    Returns:\n        area (Tensor[N]): area for each box\n    \"\"\"\n    return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])\n\n\ndef box_iou(boxes1, boxes2):\n    \"\"\"\n    Return intersection-over-union (Jaccard index) of boxes.\n\n    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.\n\n    Arguments:\n        boxes1 (Tensor[N, 4])\n        boxes2 (Tensor[M, 4])\n\n    Returns:\n        iou (Tensor[N, M]): the NxM matrix containing the pairwise\n            IoU values for every element in boxes1 and boxes2\n    \"\"\"\n    area1 = box_area(boxes1)\n    area2 = box_area(boxes2)\n\n    #  When the shapes do not match,\n    #  the shape of the returned output tensor follows the broadcasting rules\n    lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # left-top [N,M,2]\n    rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # right-bottom [N,M,2]\n\n    wh = (rb - lt).clamp(min=0)  # [N,M,2]\n    inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]\n\n    iou = inter / (area1[:, None] + area2 - inter)\n    return iou\n\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/network_files/det_utils.py",
    "content": "import torch\nimport math\nfrom typing import List, Tuple\nfrom torch import Tensor\n\n\nclass BalancedPositiveNegativeSampler(object):\n    \"\"\"\n    This class samples batches, ensuring that they contain a fixed proportion of positives\n    \"\"\"\n\n    def __init__(self, batch_size_per_image, positive_fraction):\n        # type: (int, float) -> None\n        \"\"\"\n        Arguments:\n            batch_size_per_image (int): number of elements to be selected per image\n            positive_fraction (float): percentage of positive elements per batch\n        \"\"\"\n        self.batch_size_per_image = batch_size_per_image\n        self.positive_fraction = positive_fraction\n\n    def __call__(self, matched_idxs):\n        # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]\n        \"\"\"\n        Arguments:\n            matched idxs: list of tensors containing -1, 0 or positive values.\n                Each tensor corresponds to a specific image.\n                -1 values are ignored, 0 are considered as negatives and > 0 as\n                positives.\n\n        Returns:\n            pos_idx (list[tensor])\n            neg_idx (list[tensor])\n\n        Returns two lists of binary masks for each image.\n        The first list contains the positive elements that were selected,\n        and the second list the negative example.\n        \"\"\"\n        pos_idx = []\n        neg_idx = []\n        # 遍历每张图像的matched_idxs\n        for matched_idxs_per_image in matched_idxs:\n            # >= 1的为正样本, nonzero返回非零元素索引\n            # positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1)\n            positive = torch.where(torch.ge(matched_idxs_per_image, 1))[0]\n            # = 0的为负样本\n            # negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1)\n            negative = torch.where(torch.eq(matched_idxs_per_image, 0))[0]\n\n            # 指定正样本的数量\n            num_pos = int(self.batch_size_per_image * self.positive_fraction)\n            # protect against not enough positive examples\n            # 如果正样本数量不够就直接采用所有正样本\n            num_pos = min(positive.numel(), num_pos)\n            # 指定负样本数量\n            num_neg = self.batch_size_per_image - num_pos\n            # protect against not enough negative examples\n            # 如果负样本数量不够就直接采用所有负样本\n            num_neg = min(negative.numel(), num_neg)\n\n            # randomly select positive and negative examples\n            # Returns a random permutation of integers from 0 to n - 1.\n            # 随机选择指定数量的正负样本\n            perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]\n            perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]\n\n            pos_idx_per_image = positive[perm1]\n            neg_idx_per_image = negative[perm2]\n\n            # create binary mask from indices\n            pos_idx_per_image_mask = torch.zeros_like(\n                matched_idxs_per_image, dtype=torch.uint8\n            )\n            neg_idx_per_image_mask = torch.zeros_like(\n                matched_idxs_per_image, dtype=torch.uint8\n            )\n\n            pos_idx_per_image_mask[pos_idx_per_image] = 1\n            neg_idx_per_image_mask[neg_idx_per_image] = 1\n\n            pos_idx.append(pos_idx_per_image_mask)\n            neg_idx.append(neg_idx_per_image_mask)\n\n        return pos_idx, neg_idx\n\n\n@torch.jit._script_if_tracing\ndef encode_boxes(reference_boxes, proposals, weights):\n    # type: (torch.Tensor, torch.Tensor, torch.Tensor) -> torch.Tensor\n    \"\"\"\n    Encode a set of proposals with respect to some\n    reference boxes\n\n    Arguments:\n        reference_boxes (Tensor): reference boxes(gt)\n        proposals (Tensor): boxes to be encoded(anchors)\n        weights:\n    \"\"\"\n\n    # perform some unpacking to make it JIT-fusion friendly\n    wx = weights[0]\n    wy = weights[1]\n    ww = weights[2]\n    wh = weights[3]\n\n    # unsqueeze()\n    # Returns a new tensor with a dimension of size one inserted at the specified position.\n    proposals_x1 = proposals[:, 0].unsqueeze(1)\n    proposals_y1 = proposals[:, 1].unsqueeze(1)\n    proposals_x2 = proposals[:, 2].unsqueeze(1)\n    proposals_y2 = proposals[:, 3].unsqueeze(1)\n\n    reference_boxes_x1 = reference_boxes[:, 0].unsqueeze(1)\n    reference_boxes_y1 = reference_boxes[:, 1].unsqueeze(1)\n    reference_boxes_x2 = reference_boxes[:, 2].unsqueeze(1)\n    reference_boxes_y2 = reference_boxes[:, 3].unsqueeze(1)\n\n    # implementation starts here\n    # parse widths and heights\n    ex_widths = proposals_x2 - proposals_x1\n    ex_heights = proposals_y2 - proposals_y1\n    # parse coordinate of center point\n    ex_ctr_x = proposals_x1 + 0.5 * ex_widths\n    ex_ctr_y = proposals_y1 + 0.5 * ex_heights\n\n    gt_widths = reference_boxes_x2 - reference_boxes_x1\n    gt_heights = reference_boxes_y2 - reference_boxes_y1\n    gt_ctr_x = reference_boxes_x1 + 0.5 * gt_widths\n    gt_ctr_y = reference_boxes_y1 + 0.5 * gt_heights\n\n    targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths\n    targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights\n    targets_dw = ww * torch.log(gt_widths / ex_widths)\n    targets_dh = wh * torch.log(gt_heights / ex_heights)\n\n    targets = torch.cat((targets_dx, targets_dy, targets_dw, targets_dh), dim=1)\n    return targets\n\n\nclass BoxCoder(object):\n    \"\"\"\n    This class encodes and decodes a set of bounding boxes into\n    the representation used for training the regressors.\n    \"\"\"\n\n    def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)):\n        # type: (Tuple[float, float, float, float], float) -> None\n        \"\"\"\n        Arguments:\n            weights (4-element tuple)\n            bbox_xform_clip (float)\n        \"\"\"\n        self.weights = weights\n        self.bbox_xform_clip = bbox_xform_clip\n\n    def encode(self, reference_boxes, proposals):\n        # type: (List[Tensor], List[Tensor]) -> List[Tensor]\n        \"\"\"\n        结合anchors和与之对应的gt计算regression参数\n        Args:\n            reference_boxes: List[Tensor] 每个proposal/anchor对应的gt_boxes\n            proposals: List[Tensor] anchors/proposals\n\n        Returns: regression parameters\n\n        \"\"\"\n        # 统计每张图像的anchors个数，方便后面拼接在一起处理后在分开\n        # reference_boxes和proposal数据结构相同\n        boxes_per_image = [len(b) for b in reference_boxes]\n        reference_boxes = torch.cat(reference_boxes, dim=0)\n        proposals = torch.cat(proposals, dim=0)\n\n        # targets_dx, targets_dy, targets_dw, targets_dh\n        targets = self.encode_single(reference_boxes, proposals)\n        return targets.split(boxes_per_image, 0)\n\n    def encode_single(self, reference_boxes, proposals):\n        \"\"\"\n        Encode a set of proposals with respect to some\n        reference boxes\n\n        Arguments:\n            reference_boxes (Tensor): reference boxes\n            proposals (Tensor): boxes to be encoded\n        \"\"\"\n        dtype = reference_boxes.dtype\n        device = reference_boxes.device\n        weights = torch.as_tensor(self.weights, dtype=dtype, device=device)\n        targets = encode_boxes(reference_boxes, proposals, weights)\n\n        return targets\n\n    def decode(self, rel_codes, boxes):\n        # type: (Tensor, List[Tensor]) -> Tensor\n        \"\"\"\n\n        Args:\n            rel_codes: bbox regression parameters\n            boxes: anchors/proposals\n\n        Returns:\n\n        \"\"\"\n        assert isinstance(boxes, (list, tuple))\n        assert isinstance(rel_codes, torch.Tensor)\n        boxes_per_image = [b.size(0) for b in boxes]\n        concat_boxes = torch.cat(boxes, dim=0)\n\n        box_sum = 0\n        for val in boxes_per_image:\n            box_sum += val\n\n        # 将预测的bbox回归参数应用到对应anchors上得到预测bbox的坐标\n        pred_boxes = self.decode_single(\n            rel_codes, concat_boxes\n        )\n\n        if box_sum > 0:\n            pred_boxes = pred_boxes.reshape(box_sum, -1, 4)\n\n        return pred_boxes\n\n    def decode_single(self, rel_codes, boxes):\n        \"\"\"\n        From a set of original boxes and encoded relative box offsets,\n        get the decoded boxes.\n\n        Arguments:\n            rel_codes (Tensor): encoded boxes (bbox regression parameters)\n            boxes (Tensor): reference boxes (anchors/proposals)\n        \"\"\"\n        boxes = boxes.to(rel_codes.dtype)\n\n        # xmin, ymin, xmax, ymax\n        widths = boxes[:, 2] - boxes[:, 0]   # anchor/proposal宽度\n        heights = boxes[:, 3] - boxes[:, 1]  # anchor/proposal高度\n        ctr_x = boxes[:, 0] + 0.5 * widths   # anchor/proposal中心x坐标\n        ctr_y = boxes[:, 1] + 0.5 * heights  # anchor/proposal中心y坐标\n\n        wx, wy, ww, wh = self.weights  # RPN中为[1,1,1,1], fastrcnn中为[10,10,5,5]\n        dx = rel_codes[:, 0::4] / wx   # 预测anchors/proposals的中心坐标x回归参数\n        dy = rel_codes[:, 1::4] / wy   # 预测anchors/proposals的中心坐标y回归参数\n        dw = rel_codes[:, 2::4] / ww   # 预测anchors/proposals的宽度回归参数\n        dh = rel_codes[:, 3::4] / wh   # 预测anchors/proposals的高度回归参数\n\n        # limit max value, prevent sending too large values into torch.exp()\n        # self.bbox_xform_clip=math.log(1000. / 16)   4.135\n        dw = torch.clamp(dw, max=self.bbox_xform_clip)\n        dh = torch.clamp(dh, max=self.bbox_xform_clip)\n\n        pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]\n        pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]\n        pred_w = torch.exp(dw) * widths[:, None]\n        pred_h = torch.exp(dh) * heights[:, None]\n\n        # xmin\n        pred_boxes1 = pred_ctr_x - torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w\n        # ymin\n        pred_boxes2 = pred_ctr_y - torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h\n        # xmax\n        pred_boxes3 = pred_ctr_x + torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w\n        # ymax\n        pred_boxes4 = pred_ctr_y + torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h\n\n        pred_boxes = torch.stack((pred_boxes1, pred_boxes2, pred_boxes3, pred_boxes4), dim=2).flatten(1)\n        return pred_boxes\n\n\nclass Matcher(object):\n    BELOW_LOW_THRESHOLD = -1\n    BETWEEN_THRESHOLDS = -2\n\n    __annotations__ = {\n        'BELOW_LOW_THRESHOLD': int,\n        'BETWEEN_THRESHOLDS': int,\n    }\n\n    def __init__(self, high_threshold, low_threshold, allow_low_quality_matches=False):\n        # type: (float, float, bool) -> None\n        \"\"\"\n        Args:\n            high_threshold (float): quality values greater than or equal to\n                this value are candidate matches.\n            low_threshold (float): a lower quality threshold used to stratify\n                matches into three levels:\n                1) matches >= high_threshold\n                2) BETWEEN_THRESHOLDS matches in [low_threshold, high_threshold)\n                3) BELOW_LOW_THRESHOLD matches in [0, low_threshold)\n            allow_low_quality_matches (bool): if True, produce additional matches\n                for predictions that have only low-quality match candidates. See\n                set_low_quality_matches_ for more details.\n        \"\"\"\n        self.BELOW_LOW_THRESHOLD = -1\n        self.BETWEEN_THRESHOLDS = -2\n        assert low_threshold <= high_threshold\n        self.high_threshold = high_threshold  # 0.7\n        self.low_threshold = low_threshold    # 0.3\n        self.allow_low_quality_matches = allow_low_quality_matches\n\n    def __call__(self, match_quality_matrix):\n        \"\"\"\n        计算anchors与每个gtboxes匹配的iou最大值，并记录索引，\n        iou<low_threshold索引值为-1， low_threshold<=iou<high_threshold索引值为-2\n        Args:\n            match_quality_matrix (Tensor[float]): an MxN tensor, containing the\n            pairwise quality between M ground-truth elements and N predicted elements.\n\n        Returns:\n            matches (Tensor[int64]): an N tensor where N[i] is a matched gt in\n            [0, M - 1] or a negative value indicating that prediction i could not\n            be matched.\n        \"\"\"\n        if match_quality_matrix.numel() == 0:\n            # empty targets or proposals not supported during training\n            if match_quality_matrix.shape[0] == 0:\n                raise ValueError(\n                    \"No ground-truth boxes available for one of the images \"\n                    \"during training\")\n            else:\n                raise ValueError(\n                    \"No proposal boxes available for one of the images \"\n                    \"during training\")\n\n        # match_quality_matrix is M (gt) x N (predicted)\n        # Max over gt elements (dim 0) to find best gt candidate for each prediction\n        # M x N 的每一列代表一个anchors与所有gt的匹配iou值\n        # matched_vals代表每列的最大值，即每个anchors与所有gt匹配的最大iou值\n        # matches对应最大值所在的索引\n        matched_vals, matches = match_quality_matrix.max(dim=0)  # the dimension to reduce.\n        if self.allow_low_quality_matches:\n            all_matches = matches.clone()\n        else:\n            all_matches = None\n\n        # Assign candidate matches with low quality to negative (unassigned) values\n        # 计算iou小于low_threshold的索引\n        below_low_threshold = matched_vals < self.low_threshold\n        # 计算iou在low_threshold与high_threshold之间的索引值\n        between_thresholds = (matched_vals >= self.low_threshold) & (\n            matched_vals < self.high_threshold\n        )\n        # iou小于low_threshold的matches索引置为-1\n        matches[below_low_threshold] = self.BELOW_LOW_THRESHOLD  # -1\n\n        # iou在[low_threshold, high_threshold]之间的matches索引置为-2\n        matches[between_thresholds] = self.BETWEEN_THRESHOLDS    # -2\n\n        if self.allow_low_quality_matches:\n            assert all_matches is not None\n            self.set_low_quality_matches_(matches, all_matches, match_quality_matrix)\n\n        return matches\n\n    def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix):\n        \"\"\"\n        Produce additional matches for predictions that have only low-quality matches.\n        Specifically, for each ground-truth find the set of predictions that have\n        maximum overlap with it (including ties); for each prediction in that set, if\n        it is unmatched, then match it to the ground-truth with which it has the highest\n        quality value.\n        \"\"\"\n        # For each gt, find the prediction with which it has highest quality\n        # 对于每个gt boxes寻找与其iou最大的anchor，\n        # highest_quality_foreach_gt为匹配到的最大iou值\n        highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1)  # the dimension to reduce.\n\n        # Find highest quality match available, even if it is low, including ties\n        # 寻找每个gt boxes与其iou最大的anchor索引，一个gt匹配到的最大iou可能有多个anchor\n        # gt_pred_pairs_of_highest_quality = torch.nonzero(\n        #     match_quality_matrix == highest_quality_foreach_gt[:, None]\n        # )\n        gt_pred_pairs_of_highest_quality = torch.where(\n            torch.eq(match_quality_matrix, highest_quality_foreach_gt[:, None])\n        )\n        # Example gt_pred_pairs_of_highest_quality:\n        #   tensor([[    0, 39796],\n        #           [    1, 32055],\n        #           [    1, 32070],\n        #           [    2, 39190],\n        #           [    2, 40255],\n        #           [    3, 40390],\n        #           [    3, 41455],\n        #           [    4, 45470],\n        #           [    5, 45325],\n        #           [    5, 46390]])\n        # Each row is a (gt index, prediction index)\n        # Note how gt items 1, 2, 3, and 5 each have two ties\n\n        # gt_pred_pairs_of_highest_quality[:, 0]代表是对应的gt index(不需要)\n        # pre_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1]\n        pre_inds_to_update = gt_pred_pairs_of_highest_quality[1]\n        # 保留该anchor匹配gt最大iou的索引，即使iou低于设定的阈值\n        matches[pre_inds_to_update] = all_matches[pre_inds_to_update]\n\n\ndef smooth_l1_loss(input, target, beta: float = 1. / 9, size_average: bool = True):\n    \"\"\"\n    very similar to the smooth_l1_loss from pytorch, but with\n    the extra beta parameter\n    \"\"\"\n    n = torch.abs(input - target)\n    # cond = n < beta\n    cond = torch.lt(n, beta)\n    loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)\n    if size_average:\n        return loss.mean()\n    return loss.sum()\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/network_files/image_list.py",
    "content": "from typing import List, Tuple\nfrom torch import Tensor\n\n\nclass ImageList(object):\n    \"\"\"\n    Structure that holds a list of images (of possibly\n    varying sizes) as a single tensor.\n    This works by padding the images to the same size,\n    and storing in a field the original sizes of each image\n    \"\"\"\n\n    def __init__(self, tensors, image_sizes):\n        # type: (Tensor, List[Tuple[int, int]]) -> None\n        \"\"\"\n        Arguments:\n            tensors (tensor) padding后的图像数据\n            image_sizes (list[tuple[int, int]])  padding前的图像尺寸\n        \"\"\"\n        self.tensors = tensors\n        self.image_sizes = image_sizes\n\n    def to(self, device):\n        # type: (Device) -> ImageList # noqa\n        cast_tensor = self.tensors.to(device)\n        return ImageList(cast_tensor, self.image_sizes)\n\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/network_files/losses.py",
    "content": "import torch\nimport torch.nn.functional as F\n\n\ndef sigmoid_focal_loss(\n    inputs: torch.Tensor,\n    targets: torch.Tensor,\n    alpha: float = 0.25,\n    gamma: float = 2,\n    reduction: str = \"none\",\n):\n    \"\"\"\n    Original implementation from https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/focal_loss.py .\n    Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002.\n\n    Args:\n        inputs: A float tensor of arbitrary shape.\n                The predictions for each example.\n        targets: A float tensor with the same shape as inputs. Stores the binary\n                classification label for each element in inputs\n                (0 for the negative class and 1 for the positive class).\n        alpha: (optional) Weighting factor in range (0,1) to balance\n                positive vs negative examples or -1 for ignore. Default = 0.25\n        gamma: Exponent of the modulating factor (1 - p_t) to\n               balance easy vs hard examples.\n        reduction: 'none' | 'mean' | 'sum'\n                 'none': No reduction will be applied to the output.\n                 'mean': The output will be averaged.\n                 'sum': The output will be summed.\n    Returns:\n        Loss tensor with the reduction option applied.\n    \"\"\"\n    p = torch.sigmoid(inputs)\n    ce_loss = F.binary_cross_entropy_with_logits(\n        inputs, targets, reduction=\"none\"\n    )\n    p_t = p * targets + (1 - p) * (1 - targets)\n    loss = ce_loss * ((1 - p_t) ** gamma)\n\n    if alpha >= 0:\n        alpha_t = alpha * targets + (1 - alpha) * (1 - targets)\n        loss = alpha_t * loss\n\n    if reduction == \"mean\":\n        loss = loss.mean()\n    elif reduction == \"sum\":\n        loss = loss.sum()\n\n    return loss\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/network_files/retinanet.py",
    "content": "import math\nimport warnings\nfrom collections import OrderedDict\nfrom typing import Dict, List, Tuple, Optional, Union\n\nimport torch\nfrom torch import nn, Tensor\n\nfrom . import det_utils\nfrom .anchor_utils import AnchorsGenerator\nfrom . import boxes as box_ops\nfrom .losses import sigmoid_focal_loss\nfrom .transform import GeneralizedRCNNTransform\n\n\ndef _sum(x: List[Tensor]) -> Tensor:\n    res = x[0]\n    for i in x[1:]:\n        res = res + i\n    return res\n\n\nclass RetinaNetClassificationHead(nn.Module):\n    \"\"\"\n    A classification head for use in RetinaNet.\n\n    Args:\n        in_channels (int): number of channels of the input feature\n        num_anchors (int): number of anchors to be predicted\n        num_classes (int): number of classes to be predicted\n    \"\"\"\n\n    def __init__(self, in_channels, num_anchors, num_classes, prior_probability=0.01):\n        super(RetinaNetClassificationHead, self).__init__()\n\n        # class subnet是由四个3x3的卷积层(激活函数为ReLU) + 一个3x3的卷积层(分类器)\n        conv = []\n        for _ in range(4):\n            conv.append(nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1))\n            conv.append(nn.ReLU(inplace=True))\n        self.conv = nn.Sequential(*conv)\n\n        self.cls_logits = nn.Conv2d(in_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1)\n\n        # initial weights\n        for layer in self.conv.children():\n            if isinstance(layer, nn.Conv2d):\n                torch.nn.init.normal_(layer.weight, std=0.01)\n                torch.nn.init.constant_(layer.bias, 0)\n\n        torch.nn.init.normal_(self.cls_logits.weight, std=0.01)\n        torch.nn.init.constant_(self.cls_logits.bias, -math.log((1 - prior_probability) / prior_probability))\n\n        self.num_classes = num_classes\n        self.num_anchors = num_anchors\n\n        self.BETWEEN_THRESHOLDS = det_utils.Matcher.BETWEEN_THRESHOLDS\n\n    def compute_loss(self,\n                     targets: List[Dict[str, Tensor]],\n                     head_outputs: Dict[str, Tensor],\n                     matched_idxs: List[Tensor]) -> Tensor:\n        losses = []\n        cls_logits = head_outputs[\"cls_logits\"]\n        for targets_per_img, cls_logits_per_img, matched_idxs_per_img in zip(targets, cls_logits, matched_idxs):\n            # determine only the foreground\n            # 找出所有前景目标\n            foreground_idxs_per_img = torch.ge(matched_idxs_per_img, 0)  # ge: >=\n            num_foreground = foreground_idxs_per_img.sum()\n\n            # create the target classification\n            gt_classes_target = torch.zeros_like(cls_logits_per_img)\n            gt_classes_target[\n                foreground_idxs_per_img,\n                targets_per_img[\"labels\"][matched_idxs_per_img[foreground_idxs_per_img]]\n            ] = 1.0\n\n            # find indices for which anchors should be ignored\n            # 忽略iou在[0.4, 0.5)之间的anchors\n            valid_idxs_per_img = torch.ne(matched_idxs_per_img, self.BETWEEN_THRESHOLDS)  # ne: !=\n\n            # compute the classification loss\n            losses.append(sigmoid_focal_loss(\n                cls_logits_per_img[valid_idxs_per_img],\n                gt_classes_target[valid_idxs_per_img],\n                reduction=\"sum\"\n            ) / max(1, num_foreground))  # 注意这里除以的是正样本的个数\n\n        # len(targets): batch_size\n        return _sum(losses) / len(targets)\n\n    def forward(self, x: Tensor) -> Tensor:\n        all_cls_logits = []\n\n        # 遍历每个预测特征层\n        for features in x:\n            cls_logits = self.conv(features)\n            cls_logits = self.cls_logits(cls_logits)\n\n            # Permute classification output from (N, A * K, H, W) to (N, HWA, K).\n            N, _, H, W = cls_logits.shape\n            cls_logits = cls_logits.view(N, -1, self.num_classes, H, W)\n            # [N, A, K, H, W] -> [N, H, W, A, K]\n            cls_logits = cls_logits.permute(0, 3, 4, 1, 2)\n            # [N, H, W, A, K] -> [N, HWA, K]\n            cls_logits = cls_logits.reshape(N, -1, self.num_classes)\n\n            all_cls_logits.append(cls_logits)\n\n        return torch.cat(all_cls_logits, dim=1)\n\n\nclass RetinaNetRegressionHead(nn.Module):\n    \"\"\"\n    A regression head for use in RetinaNet.\n\n    Args:\n        in_channels (int): number of channels of the input feature\n        num_anchors (int): number of anchors to be predicted\n    \"\"\"\n\n    __annotations__ = {\n        'box_coder': det_utils.BoxCoder,\n    }\n\n    def __init__(self, in_channels, num_anchors):\n        super(RetinaNetRegressionHead, self).__init__()\n\n        # box subnet是由四个3x3的卷积层(激活函数为ReLU) + 一个3x3的卷积层(边界框回归器)\n        conv = []\n        for _ in range(4):\n            conv.append(nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1))\n            conv.append(nn.ReLU(inplace=True))\n        self.conv = nn.Sequential(*conv)\n\n        self.bbox_reg = nn.Conv2d(in_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1)\n\n        # initial weights\n        for layer in self.conv.children():\n            if isinstance(layer, nn.Conv2d):\n                torch.nn.init.normal_(layer.weight, std=0.01)\n                torch.nn.init.zeros_(layer.bias)\n\n        self.bbox_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0))\n\n    def compute_loss(self,\n                     targets: List[Dict[str, Tensor]],\n                     head_outputs: Dict[str, Tensor],\n                     anchors: List[Tensor],\n                     matched_idxs: List[Tensor]) -> Tensor:\n        losses = []\n\n        bbox_regression = head_outputs[\"bbox_regression\"]\n        for targets_per_img, bbox_regression_per_img, anchors_per_img, matched_idxs_per_img in \\\n                zip(targets, bbox_regression, anchors, matched_idxs):\n            # determine only the foreground indices, ignore the rest\n            foreground_idxs_per_img = torch.where(torch.ge(matched_idxs_per_img, 0))[0]  # ge: >=\n            num_foreground = foreground_idxs_per_img.numel()\n\n            # select only the foreground boxes\n            matched_gt_boxes_per_img = targets_per_img[\"boxes\"][matched_idxs_per_img[foreground_idxs_per_img]]\n            bbox_regression_per_img = bbox_regression_per_img[foreground_idxs_per_img, :]\n            anchors_per_img = anchors_per_img[foreground_idxs_per_img, :]\n\n            # compute the regression targets\n            targets_regression = self.bbox_coder.encode_single(matched_gt_boxes_per_img, anchors_per_img)\n\n            # compute the box regression loss\n            losses.append(torch.nn.functional.l1_loss(\n                bbox_regression_per_img,\n                targets_regression,\n                reduction=\"sum\"\n            ) / max(1, num_foreground))\n\n        return _sum(losses) / max(1, len(targets))\n\n    def forward(self, x: List[Tensor]) -> Tensor:\n        all_bbox_regression = []\n\n        # 遍历每个预测特征层\n        for features in x:\n            bbox_regression = self.conv(features)\n            bbox_regression = self.bbox_reg(bbox_regression)\n\n            # Permute bbox regression output from (N, 4 * A, H, W) to (N, HWA, 4).\n            N, _, H, W = bbox_regression.shape\n            # [N, 4 * A, H, W] -> [N, A, 4, H, W]\n            bbox_regression = bbox_regression.view(N, -1, 4, H, W)\n            # [N, A, 4, H, W] -> [N, H, W, A, 4]\n            bbox_regression = bbox_regression.permute(0, 3, 4, 1, 2)\n            # [N, H, W, A, 4] -> [N, HWA, 4]\n            bbox_regression = bbox_regression.reshape(N, -1, 4)\n\n            all_bbox_regression.append(bbox_regression)\n\n        return torch.cat(all_bbox_regression, dim=1)\n\n\nclass RetinaNetHead(nn.Module):\n    \"\"\"\n    A regression and classification head for use in RetinaNet.\n\n    Args:\n        in_channels (int): number of channels of the input feature\n        num_anchors (int): number of anchors to be predicted\n        num_classes (int): number of classes to be predicted\n    \"\"\"\n\n    def __init__(self, in_channels, num_anchors, num_classes):\n        super(RetinaNetHead, self).__init__()\n        self.classification_head = RetinaNetClassificationHead(in_channels, num_anchors, num_classes)\n        self.regression_head = RetinaNetRegressionHead(in_channels, num_anchors)\n\n    def compute_loss(self,\n                     targets: List[Dict[str, Tensor]],\n                     head_outputs: Dict[str, Tensor],\n                     anchors: List[Tensor],\n                     matched_idxs: List[Tensor]) -> Dict[str, Tensor]:\n        return {\n            \"classification\": self.classification_head.compute_loss(targets, head_outputs, matched_idxs),\n            \"bbox_regression\": self.regression_head.compute_loss(targets, head_outputs, anchors, matched_idxs)\n        }\n\n    def forward(self, x: List[Tensor]) -> Dict[str, Tensor]:\n        return {\n            \"cls_logits\": self.classification_head(x),\n            \"bbox_regression\": self.regression_head(x)\n        }\n\n\nclass RetinaNet(nn.Module):\n    \"\"\"\n    Implements RetinaNet.\n\n    The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each\n    image, and should be in 0-1 range. Different images can have different sizes.\n\n    The behavior of the model changes depending if it is in training or evaluation mode.\n\n    During training, the model expects both the input tensors, as well as a targets (list of dictionary),\n    containing:\n        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with\n          ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.\n        - labels (Int64Tensor[N]): the class label for each ground-truth box\n\n    The model returns a Dict[Tensor] during training, containing the classification and regression\n    losses.\n\n    During inference, the model requires only the input tensors, and returns the post-processed\n    predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as\n    follows:\n        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with\n          ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.\n        - labels (Int64Tensor[N]): the predicted labels for each image\n        - scores (Tensor[N]): the scores for each prediction\n\n    Args:\n        backbone (nn.Module): the network used to compute the features for the model.\n            It should contain an out_channels attribute, which indicates the number of output\n            channels that each feature map has (and it should be the same for all feature maps).\n            The backbone should return a single Tensor or an OrderedDict[Tensor].\n        num_classes (int): number of output classes of the model (excluding the background).\n        min_size (int): minimum size of the image to be rescaled before feeding it to the backbone\n        max_size (int): maximum size of the image to be rescaled before feeding it to the backbone\n        image_mean (Tuple[float, float, float]): mean values used for input normalization.\n            They are generally the mean values of the dataset on which the backbone has been trained\n            on\n        image_std (Tuple[float, float, float]): std values used for input normalization.\n            They are generally the std values of the dataset on which the backbone has been trained on\n        anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature\n            maps.\n        head (nn.Module): Module run on top of the feature pyramid.\n            Defaults to a module containing a classification and regression module.\n        score_thresh (float): Score threshold used for postprocessing the detections.\n        nms_thresh (float): NMS threshold used for postprocessing the detections.\n        detections_per_img (int): Number of best detections to keep after NMS.\n        fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be\n            considered as positive during training.\n        bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be\n            considered as negative during training.\n        topk_candidates (int): Number of best detections to keep before NMS.\n    \"\"\"\n\n    __annotations__ = {\n        'box_coder': det_utils.BoxCoder,\n        'proposal_matcher': det_utils.Matcher,\n    }\n\n    def __init__(self, backbone, num_classes,\n                 # transform parameters\n                 min_size=800, max_size=1333,\n                 image_mean=None, image_std=None,\n                 # Anchor parameters\n                 anchor_generator=None, head=None,\n                 proposal_matcher=None,\n                 score_thresh=0.05,\n                 nms_thresh=0.5,\n                 detections_per_img=100,\n                 fg_iou_thresh=0.5, bg_iou_thresh=0.4,\n                 topk_candidates=1000):\n        super(RetinaNet, self).__init__()\n\n        if not hasattr(backbone, \"out_channels\"):\n            raise ValueError(\n                \"backbone should contain an attribute out_channels \"\n                \"specifying the number of output channels (assumed to be the \"\n                \"same for all the levels)\"\n            )\n\n        self.backbone = backbone\n\n        assert isinstance(anchor_generator, (AnchorsGenerator, type(None)))\n\n        if anchor_generator is None:\n            # 原论文中说在每个预测特征层上除了使用给定的尺度x外，还要额外添加x*2^(1/3)和x*2^(2/3)这两个尺度\n            # 五个预测特征层采用的原始尺度分别为32， 64， 128， 256， 512\n            # 注意尺度和面积的关系，面积=尺度^2\n            anchor_sizes = tuple((x, int(x * 2 ** (1.0 / 3)), int(x * 2 ** (2.0 / 3)))\n                                 for x in [32, 64, 128, 256, 512])\n            # 对于每个预测特征层上anchors，都会使用三种比例\n            aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)\n            anchor_generator = AnchorsGenerator(anchor_sizes, aspect_ratios)\n\n        self.anchor_generator = anchor_generator\n\n        if head is None:\n            head = RetinaNetHead(backbone.out_channels,   # in_channels\n                                 anchor_generator.num_anchors_per_location()[0],  # num_anchors\n                                 num_classes)  # num_classes\n        self.head = head\n\n        if proposal_matcher is None:\n            proposal_matcher = det_utils.Matcher(\n                fg_iou_thresh,\n                bg_iou_thresh,\n                allow_low_quality_matches=True\n            )\n        self.proposal_matcher = proposal_matcher\n\n        self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0))\n\n        if image_mean is None:\n            image_mean = [0.485, 0.456, 0.406]\n        if image_std is None:\n            image_std = [0.229, 0.224, 0.225]\n\n        self.transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std)\n\n        self.score_thresh = score_thresh\n        self.nms_thresh = nms_thresh\n        self.detections_per_img = detections_per_img\n        self.topk_candidates = topk_candidates\n\n        # used only on torchscript mode\n        self._has_warned = False\n\n    @torch.jit.unused\n    def eager_outputs(self, losses, detections):\n        # type: (Dict[str, Tensor], List[Dict[str, Tensor]]) -> Union[Dict[str, Tensor], List[Dict[str, Tensor]]]\n        if self.training:\n            return losses\n\n        return detections\n\n    def compute_loss(self, targets, head_outputs, anchors):\n        # type: (List[Dict[str, Tensor]], Dict[str, Tensor], List[Tensor]) -> Dict[str, Tensor]\n        matched_idxs = []\n        for anchors_per_img, targets_per_img in zip(anchors, targets):\n            if targets_per_img[\"boxes\"].numel() == 0:\n                matched_idxs.append(torch.full((anchors_per_img.size(0),), -1, dtype=torch.int64))\n                continue\n\n            match_quality_matrix = box_ops.box_iou(targets_per_img[\"boxes\"], anchors_per_img)\n            matched_idxs.append(self.proposal_matcher(match_quality_matrix))\n\n        return self.head.compute_loss(targets, head_outputs, anchors, matched_idxs)\n\n    def postprocess_detections(self, head_output, anchors, image_shapes):\n        # type: (Dict[str, List[Tensor]], List[List[Tensor]], List[Tuple[int, int]]) -> List[Dict[str, Tensor]]\n        class_logits = head_output[\"cls_logits\"]\n        box_regression = head_output[\"bbox_regression\"]\n\n        num_img = len(image_shapes)\n\n        detections: List[Dict[str, Tensor]] = []\n\n        for index in range(num_img):\n            box_regression_per_img = [br[index] for br in box_regression]\n            logits_per_img = [cl[index] for cl in class_logits]\n            anchors_per_img, image_shape = anchors[index], image_shapes[index]\n\n            img_boxes = []\n            img_scores = []\n            img_labels = []\n\n            for box_regression_per_level, logits_per_level, anchors_per_level in \\\n                    zip(box_regression_per_img, logits_per_img, anchors_per_img):\n                num_classes = logits_per_level.shape[-1]\n\n                # remove low scoring boxes\n                # 移除低概率的目标\n                scores_per_level = torch.sigmoid(logits_per_level).flatten()\n                keep_idxs = torch.gt(scores_per_level, self.score_thresh)  # gt: >\n                scores_per_level = scores_per_level[keep_idxs]\n                topk_idxs = torch.where(keep_idxs)[0]\n\n                # keep only topk scoring predictions\n                # 在每个level上只取前topk个目标\n                num_topk = min(self.topk_candidates, topk_idxs.size(0))\n                scores_per_level, idxs = scores_per_level.topk(num_topk)\n                topk_idxs = topk_idxs[idxs]\n\n                anchor_idxs = topk_idxs // num_classes\n                labels_per_level = topk_idxs % num_classes\n\n                boxes_per_level = self.box_coder.decode_single(box_regression_per_level[anchor_idxs],\n                                                               anchors_per_level[anchor_idxs])\n                boxes_per_level = box_ops.clip_boxes_to_image(boxes_per_level, image_shape)\n\n                img_boxes.append(boxes_per_level)\n                img_scores.append(scores_per_level)\n                img_labels.append(labels_per_level)\n\n            img_boxes = torch.cat(img_boxes, dim=0)\n            img_scores = torch.cat(img_scores, dim=0)\n            img_labels = torch.cat(img_labels, dim=0)\n\n            # non-maximum suppression\n            keep = box_ops.batched_nms(img_boxes, img_scores, img_labels, self.nms_thresh)\n            keep = keep[:self.detections_per_img]\n\n            detections.append({\n                \"boxes\": img_boxes[keep],\n                \"scores\": img_scores[keep],\n                \"labels\": img_labels[keep]\n            })\n\n        return detections\n\n    def forward(self, images, targets=None):\n        # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]\n        \"\"\"\n        Args:\n            images (list[Tensor]): images to be processed\n            targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional)\n\n        Returns:\n            result (list[BoxList] or dict[Tensor]): the output from the model.\n                During training, it returns a dict[Tensor] which contains the losses.\n                During testing, it returns list[BoxList] contains additional fields\n                like `scores`, `labels` and `mask` (for Mask R-CNN models).\n\n        \"\"\"\n        if self.training and targets is None:\n            raise ValueError(\"In training mode, targets should be passed\")\n\n        if self.training:\n            assert targets is not None\n            # check targets info\n            for target in targets:\n                boxes = target[\"boxes\"]\n                if isinstance(boxes, torch.Tensor):\n                    if len(boxes.shape) != 2 or boxes.shape[-1] != 4:\n                        raise ValueError(\"Expected target boxes to be a tensor\"\n                                         \"of shape [N, 4], got {:}.\".format(boxes.shape))\n                else:\n                    raise ValueError(\"Expected target boxes to be of type \"\n                                     \"Tensor, got {:}.\".format(type(boxes)))\n\n        # get the original images sizes\n        original_img_sizes: List[Tuple[int, int]] = []\n        for img in images:\n            val = img.shape[-2:]\n            assert len(val) == 2\n            original_img_sizes.append((val[0], val[1]))  # h, w\n\n        # transform the input\n        images, targets = self.transform(images, targets)\n\n        # Check for degenerate boxes\n        # TODO: Move this to a function\n        if targets is not None:\n            for target_idx, target in enumerate(targets):\n                boxes = target[\"boxes\"]\n                degenerate_boxes = boxes[:, 2:] <= boxes[:, :2]\n                if degenerate_boxes.any():\n                    # print the first degenerate box\n                    bb_idx = torch.where(degenerate_boxes.any(dim=1))[0][0]\n                    degen_bb: List[float] = boxes[bb_idx].tolist()\n                    raise ValueError(\"All bounding boxes should have positive height and width.\"\n                                     \" Found invalid box {} for target at index {}.\"\n                                     .format(degen_bb, target_idx))\n\n        # get the features from the backbone\n        features = self.backbone(images.tensors)\n        if isinstance(features, torch.Tensor):\n            features = OrderedDict([(\"0\", features)])\n\n        features = list(features.values())\n\n        # compute the retinanet heads outputs using the features\n        head_outputs = self.head(features)\n\n        # create the set of anchors\n        anchors = self.anchor_generator(images, features)\n\n        losses = {}\n        detections: List[Dict[str, Tensor]] = []\n        if self.training:\n            assert targets is not None\n            losses = self.compute_loss(targets, head_outputs, anchors)\n        else:\n            # recover level sizes\n            num_anchors_per_level = [x.size(2) * x.size(3) for x in features]\n            HW = 0\n            for v in num_anchors_per_level:\n                HW += v\n            HWA = head_outputs[\"cls_logits\"].size(1)\n            A = HWA // HW\n            num_anchors_per_level = [hw * A for hw in num_anchors_per_level]\n\n            # split outputs per level\n            split_head_outputs: Dict[str, List[Tensor]] = {}\n            for k in head_outputs:\n                split_head_outputs[k] = list(head_outputs[k].split(num_anchors_per_level, dim=1))\n            split_anchors = [list(a.split(num_anchors_per_level)) for a in anchors]\n\n            # compute the detections\n            detections = self.postprocess_detections(split_head_outputs, split_anchors, images.image_sizes)\n            detections = self.transform.postprocess(detections, images.image_sizes, original_img_sizes)\n\n        if torch.jit.is_scripting():\n            if not self._has_warned:\n                warnings.warn(\"RetinaNet always returns a (Losses, Detections) tuple in scripting\")\n                self._has_warned = True\n            return losses, detections\n        return self.eager_outputs(losses, detections)\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/network_files/transform.py",
    "content": "import math\nfrom typing import List, Tuple, Dict, Optional\n\nimport torch\nfrom torch import nn, Tensor\nimport torchvision\n\nfrom .image_list import ImageList\n\n\n@torch.jit.unused\ndef _resize_image_onnx(image, self_min_size, self_max_size):\n    # type: (Tensor, float, float) -> Tensor\n    from torch.onnx import operators\n    im_shape = operators.shape_as_tensor(image)[-2:]\n    min_size = torch.min(im_shape).to(dtype=torch.float32)\n    max_size = torch.max(im_shape).to(dtype=torch.float32)\n    scale_factor = torch.min(self_min_size / min_size, self_max_size / max_size)\n\n    image = torch.nn.functional.interpolate(\n        image[None], scale_factor=scale_factor, mode=\"bilinear\", recompute_scale_factor=True,\n        align_corners=False)[0]\n\n    return image\n\n\ndef _resize_image(image, self_min_size, self_max_size):\n    # type: (Tensor, float, float) -> Tensor\n    im_shape = torch.tensor(image.shape[-2:])\n    min_size = float(torch.min(im_shape))    # 获取高宽中的最小值\n    max_size = float(torch.max(im_shape))    # 获取高宽中的最大值\n    scale_factor = self_min_size / min_size  # 根据指定最小边长和图片最小边长计算缩放比例\n\n    # 如果使用该缩放比例计算的图片最大边长大于指定的最大边长\n    if max_size * scale_factor > self_max_size:\n        scale_factor = self_max_size / max_size  # 将缩放比例设为指定最大边长和图片最大边长之比\n\n    # interpolate利用插值的方法缩放图片\n    # image[None]操作是在最前面添加batch维度[C, H, W] -> [1, C, H, W]\n    # bilinear只支持4D Tensor\n    image = torch.nn.functional.interpolate(\n        image[None], scale_factor=scale_factor, mode=\"bilinear\", recompute_scale_factor=True,\n        align_corners=False)[0]\n\n    return image\n\n\nclass GeneralizedRCNNTransform(nn.Module):\n    \"\"\"\n    Performs input / target transformation before feeding the data to a GeneralizedRCNN\n    model.\n\n    The transformations it perform are:\n        - input normalization (mean subtraction and std division)\n        - input / target resizing to match min_size / max_size\n\n    It returns a ImageList for the inputs, and a List[Dict[Tensor]] for the targets\n    \"\"\"\n\n    def __init__(self, min_size, max_size, image_mean, image_std):\n        super(GeneralizedRCNNTransform, self).__init__()\n        if not isinstance(min_size, (list, tuple)):\n            min_size = (min_size,)\n        self.min_size = min_size      # 指定图像的最小边长范围\n        self.max_size = max_size      # 指定图像的最大边长范围\n        self.image_mean = image_mean  # 指定图像在标准化处理中的均值\n        self.image_std = image_std    # 指定图像在标准化处理中的方差\n\n    def normalize(self, image):\n        \"\"\"标准化处理\"\"\"\n        dtype, device = image.dtype, image.device\n        mean = torch.as_tensor(self.image_mean, dtype=dtype, device=device)\n        std = torch.as_tensor(self.image_std, dtype=dtype, device=device)\n        # [:, None, None]: shape [3] -> [3, 1, 1]\n        return (image - mean[:, None, None]) / std[:, None, None]\n\n    def torch_choice(self, k):\n        # type: (List[int]) -> int\n        \"\"\"\n        Implements `random.choice` via torch ops so it can be compiled with\n        TorchScript. Remove if https://github.com/pytorch/pytorch/issues/25803\n        is fixed.\n        \"\"\"\n        index = int(torch.empty(1).uniform_(0., float(len(k))).item())\n        return k[index]\n\n    def resize(self, image, target):\n        # type: (Tensor, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]\n        \"\"\"\n        将图片缩放到指定的大小范围内，并对应缩放bboxes信息\n        Args:\n            image: 输入的图片\n            target: 输入图片的相关信息（包括bboxes信息）\n\n        Returns:\n            image: 缩放后的图片\n            target: 缩放bboxes后的图片相关信息\n        \"\"\"\n        # image shape is [channel, height, width]\n        h, w = image.shape[-2:]\n\n        if self.training:\n            size = float(self.torch_choice(self.min_size))  # 指定输入图片的最小边长,注意是self.min_size不是min_size\n        else:\n            # FIXME assume for now that testing uses the largest scale\n            size = float(self.min_size[-1])    # 指定输入图片的最小边长,注意是self.min_size不是min_size\n\n        if torchvision._is_tracing():\n            image = _resize_image_onnx(image, size, float(self.max_size))\n        else:\n            image = _resize_image(image, size, float(self.max_size))\n\n        if target is None:\n            return image, target\n\n        bbox = target[\"boxes\"]\n        # 根据图像的缩放比例来缩放bbox\n        bbox = resize_boxes(bbox, [h, w], image.shape[-2:])\n        target[\"boxes\"] = bbox\n\n        return image, target\n\n    # _onnx_batch_images() is an implementation of\n    # batch_images() that is supported by ONNX tracing.\n    @torch.jit.unused\n    def _onnx_batch_images(self, images, size_divisible=32):\n        # type: (List[Tensor], int) -> Tensor\n        max_size = []\n        for i in range(images[0].dim()):\n            max_size_i = torch.max(torch.stack([img.shape[i] for img in images]).to(torch.float32)).to(torch.int64)\n            max_size.append(max_size_i)\n        stride = size_divisible\n        max_size[1] = (torch.ceil((max_size[1].to(torch.float32)) / stride) * stride).to(torch.int64)\n        max_size[2] = (torch.ceil((max_size[2].to(torch.float32)) / stride) * stride).to(torch.int64)\n        max_size = tuple(max_size)\n\n        # work around for\n        # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)\n        # which is not yet supported in onnx\n        padded_imgs = []\n        for img in images:\n            padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]\n            padded_img = torch.nn.functional.pad(img, [0, padding[2], 0, padding[1], 0, padding[0]])\n            padded_imgs.append(padded_img)\n\n        return torch.stack(padded_imgs)\n\n    def max_by_axis(self, the_list):\n        # type: (List[List[int]]) -> List[int]\n        maxes = the_list[0]\n        for sublist in the_list[1:]:\n            for index, item in enumerate(sublist):\n                maxes[index] = max(maxes[index], item)\n        return maxes\n\n    def batch_images(self, images, size_divisible=32):\n        # type: (List[Tensor], int) -> Tensor\n        \"\"\"\n        将一批图像打包成一个batch返回（注意batch中每个tensor的shape是相同的）\n        Args:\n            images: 输入的一批图片\n            size_divisible: 将图像高和宽调整到该数的整数倍\n\n        Returns:\n            batched_imgs: 打包成一个batch后的tensor数据\n        \"\"\"\n\n        if torchvision._is_tracing():\n            # batch_images() does not export well to ONNX\n            # call _onnx_batch_images() instead\n            return self._onnx_batch_images(images, size_divisible)\n\n        # 分别计算一个batch中所有图片中的最大channel, height, width\n        max_size = self.max_by_axis([list(img.shape) for img in images])\n\n        stride = float(size_divisible)\n        # max_size = list(max_size)\n        # 将height向上调整到stride的整数倍\n        max_size[1] = int(math.ceil(float(max_size[1]) / stride) * stride)\n        # 将width向上调整到stride的整数倍\n        max_size[2] = int(math.ceil(float(max_size[2]) / stride) * stride)\n\n        # [batch, channel, height, width]\n        batch_shape = [len(images)] + max_size\n\n        # 创建shape为batch_shape且值全部为0的tensor\n        batched_imgs = images[0].new_full(batch_shape, 0)\n        for img, pad_img in zip(images, batched_imgs):\n            # 将输入images中的每张图片复制到新的batched_imgs的每张图片中，对齐左上角，保证bboxes的坐标不变\n            # 这样保证输入到网络中一个batch的每张图片的shape相同\n            # copy_: Copies the elements from src into self tensor and returns self\n            pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)\n\n        return batched_imgs\n\n    def postprocess(self,\n                    result,                # type: List[Dict[str, Tensor]]\n                    image_shapes,          # type: List[Tuple[int, int]]\n                    original_image_sizes   # type: List[Tuple[int, int]]\n                    ):\n        # type: (...) -> List[Dict[str, Tensor]]\n        \"\"\"\n        对网络的预测结果进行后处理（主要将bboxes还原到原图像尺度上）\n        Args:\n            result: list(dict), 网络的预测结果, len(result) == batch_size\n            image_shapes: list(torch.Size), 图像预处理缩放后的尺寸, len(image_shapes) == batch_size\n            original_image_sizes: list(torch.Size), 图像的原始尺寸, len(original_image_sizes) == batch_size\n\n        Returns:\n\n        \"\"\"\n        if self.training:\n            return result\n\n        # 遍历每张图片的预测信息，将boxes信息还原回原尺度\n        for i, (pred, im_s, o_im_s) in enumerate(zip(result, image_shapes, original_image_sizes)):\n            boxes = pred[\"boxes\"]\n            boxes = resize_boxes(boxes, im_s, o_im_s)  # 将bboxes缩放回原图像尺度上\n            result[i][\"boxes\"] = boxes\n        return result\n\n    def __repr__(self):\n        \"\"\"自定义输出实例化对象的信息，可通过print打印实例信息\"\"\"\n        format_string = self.__class__.__name__ + '('\n        _indent = '\\n    '\n        format_string += \"{0}Normalize(mean={1}, std={2})\".format(_indent, self.image_mean, self.image_std)\n        format_string += \"{0}Resize(min_size={1}, max_size={2}, mode='bilinear')\".format(_indent, self.min_size,\n                                                                                         self.max_size)\n        format_string += '\\n)'\n        return format_string\n\n    def forward(self,\n                images,       # type: List[Tensor]\n                targets=None  # type: Optional[List[Dict[str, Tensor]]]\n                ):\n        # type: (...) -> Tuple[ImageList, Optional[List[Dict[str, Tensor]]]]\n        images = [img for img in images]\n        for i in range(len(images)):\n            image = images[i]\n            target_index = targets[i] if targets is not None else None\n\n            if image.dim() != 3:\n                raise ValueError(\"images is expected to be a list of 3d tensors \"\n                                 \"of shape [C, H, W], got {}\".format(image.shape))\n            image = self.normalize(image)                # 对图像进行标准化处理\n            image, target_index = self.resize(image, target_index)   # 对图像和对应的bboxes缩放到指定范围\n            images[i] = image\n            if targets is not None and target_index is not None:\n                targets[i] = target_index\n\n        # 记录resize后的图像尺寸\n        image_sizes = [img.shape[-2:] for img in images]\n        images = self.batch_images(images)  # 将images打包成一个batch\n        image_sizes_list = torch.jit.annotate(List[Tuple[int, int]], [])\n\n        for image_size in image_sizes:\n            assert len(image_size) == 2\n            image_sizes_list.append((image_size[0], image_size[1]))\n\n        image_list = ImageList(images, image_sizes_list)\n        return image_list, targets\n\n\ndef resize_boxes(boxes, original_size, new_size):\n    # type: (Tensor, List[int], List[int]) -> Tensor\n    \"\"\"\n    将boxes参数根据图像的缩放情况进行相应缩放\n\n    Arguments:\n        original_size: 图像缩放前的尺寸\n        new_size: 图像缩放后的尺寸\n    \"\"\"\n    ratios = [\n        torch.tensor(s, dtype=torch.float32, device=boxes.device) /\n        torch.tensor(s_orig, dtype=torch.float32, device=boxes.device)\n        for s, s_orig in zip(new_size, original_size)\n    ]\n    ratios_height, ratios_width = ratios\n    # Removes a tensor dimension, boxes [minibatch, 4]\n    # Returns a tuple of all slices along a given dimension, already without it.\n    xmin, ymin, xmax, ymax = boxes.unbind(1)\n    xmin = xmin * ratios_width\n    xmax = xmax * ratios_width\n    ymin = ymin * ratios_height\n    ymax = ymax * ratios_height\n    return torch.stack((xmin, ymin, xmax, ymax), dim=1)\n\n\n\n\n\n\n\n\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/pascal_voc_classes.json",
    "content": "{\n    \"aeroplane\": 0,\n    \"bicycle\": 1,\n    \"bird\": 2,\n    \"boat\": 3,\n    \"bottle\": 4,\n    \"bus\": 5,\n    \"car\": 6,\n    \"cat\": 7,\n    \"chair\": 8,\n    \"cow\": 9,\n    \"diningtable\": 10,\n    \"dog\": 11,\n    \"horse\": 12,\n    \"motorbike\": 13,\n    \"person\": 14,\n    \"pottedplant\": 15,\n    \"sheep\": 16,\n    \"sofa\": 17,\n    \"train\": 18,\n    \"tvmonitor\": 19\n}"
  },
  {
    "path": "pytorch_object_detection/retinaNet/plot_curve.py",
    "content": "import datetime\nimport matplotlib.pyplot as plt\n\n\ndef plot_loss_and_lr(train_loss, learning_rate):\n    try:\n        x = list(range(len(train_loss)))\n        fig, ax1 = plt.subplots(1, 1)\n        ax1.plot(x, train_loss, 'r', label='loss')\n        ax1.set_xlabel(\"step\")\n        ax1.set_ylabel(\"loss\")\n        ax1.set_title(\"Train Loss and lr\")\n        plt.legend(loc='best')\n\n        ax2 = ax1.twinx()\n        ax2.plot(x, learning_rate, label='lr')\n        ax2.set_ylabel(\"learning rate\")\n        ax2.set_xlim(0, len(train_loss))  # 设置横坐标整数间隔\n        plt.legend(loc='best')\n\n        handles1, labels1 = ax1.get_legend_handles_labels()\n        handles2, labels2 = ax2.get_legend_handles_labels()\n        plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right')\n\n        fig.subplots_adjust(right=0.8)  # 防止出现保存图片显示不全的情况\n        fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")))\n        plt.close()\n        print(\"successful save loss curve! \")\n    except Exception as e:\n        print(e)\n\n\ndef plot_map(mAP):\n    try:\n        x = list(range(len(mAP)))\n        plt.plot(x, mAP, label='mAp')\n        plt.xlabel('epoch')\n        plt.ylabel('mAP')\n        plt.title('Eval mAP')\n        plt.xlim(0, len(mAP))\n        plt.legend(loc='best')\n        plt.savefig('./mAP.png')\n        plt.close()\n        print(\"successful save mAP curve!\")\n    except Exception as e:\n        print(e)\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/predict.py",
    "content": "import os\nimport time\nimport json\n\nimport torch\nfrom PIL import Image\nimport matplotlib.pyplot as plt\n\nfrom torchvision import transforms\nfrom network_files import RetinaNet\nfrom backbone import resnet50_fpn_backbone, LastLevelP6P7\nfrom draw_box_utils import draw_objs\n\n\ndef create_model(num_classes):\n    # resNet50+fpn+retinanet\n    # 注意，这里的norm_layer要和训练脚本中保持一致\n    backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d,\n                                     returned_layers=[2, 3, 4],\n                                     extra_blocks=LastLevelP6P7(256, 256))\n    model = RetinaNet(backbone, num_classes)\n\n    return model\n\n\ndef time_synchronized():\n    torch.cuda.synchronize() if torch.cuda.is_available() else None\n    return time.time()\n\n\ndef main():\n    # get devices\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n    print(\"using {} device.\".format(device))\n\n    # create model\n    # 注意：不包含背景\n    model = create_model(num_classes=20)\n\n    # load train weights\n    weights_path = \"./save_weights/model.pth\"\n    assert os.path.exists(weights_path), \"{} file dose not exist.\".format(weights_path)\n    weights_dict = torch.load(weights_path, map_location='cpu')\n    weights_dict = weights_dict[\"model\"] if \"model\" in weights_dict else weights_dict\n    model.load_state_dict(weights_dict)\n    model.to(device)\n\n    # read class_indict\n    label_json_path = './pascal_voc_classes.json'\n    assert os.path.exists(label_json_path), \"json file {} dose not exist.\".format(label_json_path)\n    with open(label_json_path, 'r') as f:\n        class_dict = json.load(f)\n\n    category_index = {str(v): str(k) for k, v in class_dict.items()}\n\n    # load image\n    original_img = Image.open(\"./test.jpg\")\n\n    # from pil image to tensor, do not normalize image\n    data_transform = transforms.Compose([transforms.ToTensor()])\n    img = data_transform(original_img)\n    # expand batch dimension\n    img = torch.unsqueeze(img, dim=0)\n\n    model.eval()  # 进入验证模式\n    with torch.no_grad():\n        # init\n        img_height, img_width = img.shape[-2:]\n        init_img = torch.zeros((1, 3, img_height, img_width), device=device)\n        model(init_img)\n\n        t_start = time_synchronized()\n        predictions = model(img.to(device))[0]\n        t_end = time_synchronized()\n        print(\"inference+NMS time: {}\".format(t_end - t_start))\n\n        predict_boxes = predictions[\"boxes\"].to(\"cpu\").numpy()\n        predict_classes = predictions[\"labels\"].to(\"cpu\").numpy()\n        predict_scores = predictions[\"scores\"].to(\"cpu\").numpy()\n\n        if len(predict_boxes) == 0:\n            print(\"没有检测到任何目标!\")\n\n        plot_img = draw_objs(original_img,\n                             predict_boxes,\n                             predict_classes,\n                             predict_scores,\n                             category_index=category_index,\n                             box_thresh=0.5,\n                             line_thickness=3,\n                             font='arial.ttf',\n                             font_size=20)\n        plt.imshow(plot_img)\n        plt.show()\n        # 保存预测的图片结果\n        plot_img.save(\"test_result.jpg\")\n\n\nif __name__ == '__main__':\n    main()\n\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/requirements.txt",
    "content": "lxml\nmatplotlib\nnumpy\ntqdm\ntorch==1.7.1\ntorchvision==0.8.2\npycocotools\nPillow\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/results20210421-142632.txt",
    "content": "epoch:0 0.4012  0.6088  0.4334  0.1691  0.3113  0.4498  0.4265  0.6233  0.6478  0.3362  0.5541  0.6977  1.0681  0.01\nepoch:1 0.5028  0.7295  0.5441  0.2219  0.3913  0.5552  0.4624  0.6649  0.6875  0.4039  0.5928  0.7346  0.5422  0.01\nepoch:2 0.5311  0.7614  0.5784  0.2439  0.4189  0.5852  0.4733  0.6774  0.698  0.417  0.6105  0.7441  0.4456  0.01\nepoch:3 0.5439  0.7762  0.595  0.2412  0.4292  0.5996  0.4773  0.6835  0.7021  0.4137  0.6074  0.7494  0.3872  0.01\nepoch:4 0.5404  0.7739  0.5949  0.2457  0.426  0.5968  0.4723  0.6818  0.7007  0.4363  0.6047  0.7479  0.347  0.01\nepoch:5 0.5513  0.7867  0.6021  0.2415  0.4265  0.6087  0.4811  0.685  0.7041  0.4073  0.6088  0.7526  0.3166  0.01\nepoch:6 0.5508  0.7909  0.6014  0.2327  0.4211  0.6116  0.478  0.6803  0.699  0.4081  0.5994  0.7485  0.2884  0.01\nepoch:7 0.5617  0.7972  0.6142  0.2431  0.427  0.6223  0.4848  0.6862  0.7049  0.4184  0.6018  0.7551  0.2546  0.001\nepoch:8 0.561  0.7986  0.6117  0.2342  0.4268  0.6223  0.4842  0.6855  0.705  0.4153  0.6051  0.7551  0.2462  0.001\nepoch:9 0.563  0.7983  0.6153  0.2359  0.4336  0.6237  0.4849  0.6884  0.7068  0.4103  0.6063  0.7574  0.2428  0.001\nepoch:10 0.563  0.7991  0.6167  0.2363  0.4334  0.6234  0.4854  0.6879  0.7062  0.4152  0.6063  0.7558  0.2391  0.001\nepoch:11 0.5637  0.7984  0.6145  0.2341  0.4345  0.6241  0.4842  0.6894  0.7083  0.4136  0.6074  0.7581  0.2355  0.001\nepoch:12 0.5624  0.7969  0.6155  0.2373  0.4292  0.623  0.4853  0.6866  0.7055  0.4136  0.6026  0.756  0.2323  0.0001\nepoch:13 0.5632  0.7985  0.6155  0.2358  0.4342  0.6243  0.4858  0.6878  0.7065  0.4206  0.6039  0.7576  0.2307  0.0001\nepoch:14 0.562  0.7977  0.6155  0.2309  0.4291  0.6234  0.4849  0.6869  0.7051  0.4198  0.6023  0.7558  0.2305  0.0001\nepoch:15 0.5631  0.7984  0.6155  0.2324  0.4326  0.6238  0.4849  0.6876  0.706  0.4151  0.6039  0.7565  0.2313  0.0001\nepoch:16 0.5632  0.7992  0.6164  0.2349  0.429  0.6245  0.4859  0.6871  0.7063  0.4186  0.604  0.7569  0.2302  0.0001\nepoch:17 0.5637  0.7994  0.6164  0.2325  0.4312  0.6245  0.4854  0.6873  0.706  0.4109  0.6023  0.7567  0.2312  0.0001\nepoch:18 0.5626  0.7984  0.6132  0.2333  0.431  0.6238  0.4854  0.6873  0.7056  0.4158  0.6025  0.7564  0.2298  0.0001\nepoch:19 0.5613  0.7981  0.612  0.2365  0.4278  0.622  0.4855  0.6867  0.7047  0.4112  0.6  0.7554  0.2305  0.0001\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/train.py",
    "content": "import os\nimport datetime\n\nimport torch\n\nimport transforms\nfrom backbone import resnet50_fpn_backbone, LastLevelP6P7\nfrom network_files import RetinaNet\nfrom my_dataset import VOCDataSet\nfrom train_utils import GroupedBatchSampler, create_aspect_ratio_groups\nfrom train_utils import train_eval_utils as utils\n\n\ndef create_model(num_classes):\n    # 创建retinanet_res50_fpn模型\n    # skip P2 because it generates too many anchors (according to their paper)\n    # 注意，这里的backbone默认使用的是FrozenBatchNorm2d，即不会去更新bn参数\n    # 目的是为了防止batch_size太小导致效果更差(如果显存很小，建议使用默认的FrozenBatchNorm2d)\n    # 如果GPU显存很大可以设置比较大的batch_size就可以将norm_layer设置为普通的BatchNorm2d\n    backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d,\n                                     returned_layers=[2, 3, 4],\n                                     extra_blocks=LastLevelP6P7(256, 256),\n                                     trainable_layers=3)\n    model = RetinaNet(backbone, num_classes)\n\n    # 载入预训练权重\n    # https://download.pytorch.org/models/retinanet_resnet50_fpn_coco-eeacb38b.pth\n    weights_dict = torch.load(\"./backbone/retinanet_resnet50_fpn.pth\", map_location='cpu')\n    # 删除分类器部分的权重，因为自己的数据集类别与预训练数据集类别(91)不一定致，如果载入会出现冲突\n    del_keys = [\"head.classification_head.cls_logits.weight\", \"head.classification_head.cls_logits.bias\"]\n    for k in del_keys:\n        del weights_dict[k]\n    print(model.load_state_dict(weights_dict, strict=False))\n\n    return model\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n    print(\"Using {} device training.\".format(device.type))\n\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.ToTensor(),\n                                     transforms.RandomHorizontalFlip(0.5)]),\n        \"val\": transforms.Compose([transforms.ToTensor()])\n    }\n\n    VOC_root = args.data_path\n    # check voc root\n    if os.path.exists(os.path.join(VOC_root, \"VOCdevkit\")) is False:\n        raise FileNotFoundError(\"VOCdevkit dose not in path:'{}'.\".format(VOC_root))\n\n    # load train data set\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt\n    train_dataset = VOCDataSet(VOC_root, \"2012\", data_transform[\"train\"], \"train.txt\")\n    train_sampler = None\n\n    # 是否按图片相似高宽比采样图片组成batch\n    # 使用的话能够减小训练时所需GPU显存，默认使用\n    if args.aspect_ratio_group_factor >= 0:\n        train_sampler = torch.utils.data.RandomSampler(train_dataset)\n        # 统计所有图像高宽比例在bins区间中的位置索引\n        group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor)\n        # 每个batch图片从同一高宽比例区间中取\n        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)\n\n    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch\n    batch_size = args.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using %g dataloader workers' % nw)\n    if train_sampler:\n        # 如果按照图片高宽比采样图片，dataloader中需要使用batch_sampler\n        train_data_loader = torch.utils.data.DataLoader(train_dataset,\n                                                        batch_sampler=train_batch_sampler,\n                                                        pin_memory=True,\n                                                        num_workers=nw,\n                                                        collate_fn=train_dataset.collate_fn)\n    else:\n        train_data_loader = torch.utils.data.DataLoader(train_dataset,\n                                                        batch_size=batch_size,\n                                                        shuffle=True,\n                                                        pin_memory=True,\n                                                        num_workers=nw,\n                                                        collate_fn=train_dataset.collate_fn)\n\n    # load validation data set\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt\n    val_dataset = VOCDataSet(VOC_root, \"2012\", data_transform[\"val\"], \"val.txt\")\n    val_data_loader = torch.utils.data.DataLoader(val_dataset,\n                                                  batch_size=1,\n                                                  shuffle=False,\n                                                  pin_memory=True,\n                                                  num_workers=nw,\n                                                  collate_fn=val_dataset.collate_fn)\n\n    # create model\n    # 注意：不包含背景\n    model = create_model(num_classes=args.num_classes)\n    # print(model)\n\n    model.to(device)\n\n    # define optimizer\n    params = [p for p in model.parameters() if p.requires_grad]\n    optimizer = torch.optim.SGD(params, lr=0.005,\n                                momentum=0.9, weight_decay=0.0005)\n\n    scaler = torch.cuda.amp.GradScaler() if args.amp else None\n\n    # learning rate scheduler\n    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,\n                                                   step_size=3,\n                                                   gamma=0.33)\n\n    # 如果指定了上次训练保存的权重文件地址，则接着上次结果接着训练\n    if args.resume != \"\":\n        checkpoint = torch.load(args.resume, map_location='cpu')\n        model.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n        if args.amp and \"scaler\" in checkpoint:\n            scaler.load_state_dict(checkpoint[\"scaler\"])\n        print(\"the training process from epoch{}...\".format(args.start_epoch))\n\n    train_loss = []\n    learning_rate = []\n    val_map = []\n\n    for epoch in range(args.start_epoch, args.epochs):\n        # train for one epoch, printing every 10 iterations\n        mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader,\n                                              device, epoch, print_freq=50,\n                                              warmup=True, scaler=scaler)\n        train_loss.append(mean_loss.item())\n        learning_rate.append(lr)\n\n        # update the learning rate\n        lr_scheduler.step()\n\n        # evaluate on the test dataset\n        coco_info = utils.evaluate(model, val_data_loader, device=device)\n\n        # write into txt\n        with open(results_file, \"a\") as f:\n            # 写入的数据包括coco指标还有loss和learning rate\n            result_info = [f\"{i:.4f}\" for i in coco_info + [mean_loss.item()]] + [f\"{lr:.6f}\"]\n            txt = \"epoch:{} {}\".format(epoch, '  '.join(result_info))\n            f.write(txt + \"\\n\")\n\n        val_map.append(coco_info[1])  # pascal map\n\n        # save weights\n        save_files = {\n            'model': model.state_dict(),\n            'optimizer': optimizer.state_dict(),\n            'lr_scheduler': lr_scheduler.state_dict(),\n            'epoch': epoch}\n        if args.amp:\n            save_files[\"scaler\"] = scaler.state_dict()\n        torch.save(save_files, \"./save_weights/resNetFpn-model-{}.pth\".format(epoch))\n\n    # plot loss and lr curve\n    if len(train_loss) != 0 and len(learning_rate) != 0:\n        from plot_curve import plot_loss_and_lr\n        plot_loss_and_lr(train_loss, learning_rate)\n\n    # plot mAP curve\n    if len(val_map) != 0:\n        from plot_curve import plot_map\n        plot_map(val_map)\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 训练设备类型\n    parser.add_argument('--device', default='cuda:0', help='device')\n    # 训练数据集的根目录(VOCdevkit)\n    parser.add_argument('--data-path', default='/data', help='dataset')\n    # 检测目标类别数(不包含背景)\n    parser.add_argument('--num-classes', default=20, type=int, help='num_classes')\n    # 文件保存地址\n    parser.add_argument('--output-dir', default='./save_weights', help='path where to save')\n    # 若需要接着上次训练，则指定上次训练保存权重文件地址\n    parser.add_argument('--resume', default='', type=str, help='resume from checkpoint')\n    # 指定接着从哪个epoch数开始训练\n    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')\n    # 训练的总epoch数\n    parser.add_argument('--epochs', default=15, type=int, metavar='N',\n                        help='number of total epochs to run')\n    # 训练的batch size\n    parser.add_argument('--batch_size', default=4, type=int, metavar='N',\n                        help='batch size when training.')\n    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)\n    # 是否使用混合精度训练(需要GPU支持混合精度)\n    parser.add_argument(\"--amp\", default=False, help=\"Use torch.cuda.amp for mixed precision training\")\n\n    args = parser.parse_args()\n    print(args)\n\n    # 检查保存权重文件夹是否存在，不存在则创建\n    if not os.path.exists(args.output_dir):\n        os.makedirs(args.output_dir)\n\n    main(args)\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/train_multi_GPU.py",
    "content": "import os\nimport time\nimport datetime\n\nimport torch\n\nimport transforms\nfrom backbone import resnet50_fpn_backbone, LastLevelP6P7\nfrom network_files import RetinaNet\nfrom my_dataset import VOCDataSet\nfrom train_utils import train_eval_utils as utils\nfrom train_utils import GroupedBatchSampler, create_aspect_ratio_groups, init_distributed_mode, save_on_master, mkdir\n\n\ndef create_model(num_classes):\n    # 创建retinanet_res50_fpn模型\n    # skip P2 because it generates too many anchors (according to their paper)\n    # 注意，这里的backbone默认使用的是FrozenBatchNorm2d，即不会去更新bn参数\n    # 目的是为了防止batch_size太小导致效果更差(如果显存很小，建议使用默认的FrozenBatchNorm2d)\n    # 如果GPU显存很大可以设置比较大的batch_size就可以将norm_layer设置为普通的BatchNorm2d\n    backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d,\n                                     returned_layers=[2, 3, 4],\n                                     extra_blocks=LastLevelP6P7(256, 256),\n                                     trainable_layers=3)\n    model = RetinaNet(backbone, num_classes)\n\n    # 载入预训练权重\n    # https://download.pytorch.org/models/retinanet_resnet50_fpn_coco-eeacb38b.pth\n    weights_dict = torch.load(\"./backbone/retinanet_resnet50_fpn.pth\", map_location='cpu')\n    # 删除分类器部分的权重，因为自己的数据集类别与预训练数据集类别(91)不一定致，如果载入会出现冲突\n    del_keys = [\"head.classification_head.cls_logits.weight\", \"head.classification_head.cls_logits.bias\"]\n    for k in del_keys:\n        del weights_dict[k]\n    print(model.load_state_dict(weights_dict, strict=False))\n\n    return model\n\n\ndef main(args):\n    init_distributed_mode(args)\n    print(args)\n\n    device = torch.device(args.device)\n\n    # 用来保存coco_info的文件\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    # Data loading code\n    print(\"Loading data\")\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.ToTensor(),\n                                     transforms.RandomHorizontalFlip(0.5)]),\n        \"val\": transforms.Compose([transforms.ToTensor()])\n    }\n\n    VOC_root = args.data_path\n    # check voc root\n    if os.path.exists(os.path.join(VOC_root, \"VOCdevkit\")) is False:\n        raise FileNotFoundError(\"VOCdevkit dose not in path:'{}'.\".format(VOC_root))\n\n    # load train data set\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt\n    train_dataset = VOCDataSet(VOC_root, \"2012\", data_transform[\"train\"], \"train.txt\")\n\n    # load validation data set\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt\n    val_dataset = VOCDataSet(VOC_root, \"2012\", data_transform[\"val\"], \"val.txt\")\n\n    print(\"Creating data loaders\")\n    if args.distributed:\n        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)\n        test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)\n    else:\n        train_sampler = torch.utils.data.RandomSampler(train_dataset)\n        test_sampler = torch.utils.data.SequentialSampler(val_dataset)\n\n    if args.aspect_ratio_group_factor >= 0:\n        # 统计所有图像比例在bins区间中的位置索引\n        group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor)\n        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)\n    else:\n        train_batch_sampler = torch.utils.data.BatchSampler(\n            train_sampler, args.batch_size, drop_last=True)\n\n    data_loader = torch.utils.data.DataLoader(\n        train_dataset, batch_sampler=train_batch_sampler, num_workers=args.workers,\n        collate_fn=train_dataset.collate_fn)\n\n    data_loader_test = torch.utils.data.DataLoader(\n        val_dataset, batch_size=1,\n        sampler=test_sampler, num_workers=args.workers,\n        collate_fn=train_dataset.collate_fn)\n\n    print(\"Creating model\")\n    # create model\n    # 注意：不包含背景\n    model = create_model(num_classes=args.num_classes)\n    model.to(device)\n\n    model_without_ddp = model\n    if args.distributed:\n        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])\n        model_without_ddp = model.module\n\n    params = [p for p in model.parameters() if p.requires_grad]\n    optimizer = torch.optim.SGD(\n        params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)\n\n    scaler = torch.cuda.amp.GradScaler() if args.amp else None\n\n    # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma)\n    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)\n\n    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练\n    if args.resume:\n        # If map_location is missing, torch.load will first load the module to CPU\n        # and then copy each parameter to where it was saved,\n        # which would result in all processes on the same machine using the same set of devices.\n        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)\n        model_without_ddp.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n        if args.amp and \"scaler\" in checkpoint:\n            scaler.load_state_dict(checkpoint[\"scaler\"])\n\n    if args.test_only:\n        utils.evaluate(model, data_loader_test, device=device)\n        return\n\n    train_loss = []\n    learning_rate = []\n    val_map = []\n\n    print(\"Start training\")\n    start_time = time.time()\n    for epoch in range(args.start_epoch, args.epochs):\n        if args.distributed:\n            train_sampler.set_epoch(epoch)\n        mean_loss, lr = utils.train_one_epoch(model, optimizer, data_loader,\n                                              device, epoch, args.print_freq,\n                                              warmup=True, scaler=scaler)\n        train_loss.append(mean_loss.item())\n        learning_rate.append(lr)\n\n        # update learning rate\n        lr_scheduler.step()\n\n        # evaluate after every epoch\n        coco_info = utils.evaluate(model, data_loader_test, device=device)\n        val_map.append(coco_info[1])  # pascal mAP\n\n        # 只在主进程上进行写操作\n        if args.rank in [-1, 0]:\n            # write into txt\n            with open(results_file, \"a\") as f:\n                # 写入的数据包括coco指标还有loss和learning rate\n                result_info = [f\"{i:.4f}\" for i in coco_info + [mean_loss.item()]] + [f\"{lr:.6f}\"]\n                txt = \"epoch:{} {}\".format(epoch, '  '.join(result_info))\n                f.write(txt + \"\\n\")\n\n        if args.output_dir:\n            # 只在主节点上执行保存权重操作\n            save_files = {\n                'model': model_without_ddp.state_dict(),\n                'optimizer': optimizer.state_dict(),\n                'lr_scheduler': lr_scheduler.state_dict(),\n                'args': args,\n                'epoch': epoch}\n            if args.amp:\n                save_files[\"scaler\"] = scaler.state_dict()\n            save_on_master(save_files,\n                           os.path.join(args.output_dir, f'model_{epoch}.pth'))\n\n    total_time = time.time() - start_time\n    total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n    print('Training time {}'.format(total_time_str))\n\n    if args.rank in [-1, 0]:\n        # plot loss and lr curve\n        if len(train_loss) != 0 and len(learning_rate) != 0:\n            from plot_curve import plot_loss_and_lr\n            plot_loss_and_lr(train_loss, learning_rate)\n\n        # plot mAP curve\n        if len(val_map) != 0:\n            from plot_curve import plot_map\n            plot_map(val_map)\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 训练文件的根目录(VOCdevkit)\n    parser.add_argument('--data-path', default='/data', help='dataset')\n    # 训练设备类型\n    parser.add_argument('--device', default='cuda', help='device')\n    # 检测目标类别数(不包含背景)\n    parser.add_argument('--num-classes', default=20, type=int, help='num_classes')\n    # 每块GPU上的batch_size\n    parser.add_argument('-b', '--batch-size', default=4, type=int,\n                        help='images per gpu, the total batch size is $NGPU x batch_size')\n    # 指定接着从哪个epoch数开始训练\n    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')\n    # 训练的总epoch数\n    parser.add_argument('--epochs', default=20, type=int, metavar='N',\n                        help='number of total epochs to run')\n    # 数据加载以及预处理的线程数\n    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',\n                        help='number of data loading workers (default: 4)')\n    # 学习率，这个需要根据gpu的数量以及batch_size进行设置0.02 / 8 * num_GPU\n    parser.add_argument('--lr', default=0.02, type=float,\n                        help='initial learning rate, 0.02 is the default value for training '\n                             'on 8 gpus and 2 images_per_gpu')\n    # SGD的momentum参数\n    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',\n                        help='momentum')\n    # SGD的weight_decay参数\n    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,\n                        metavar='W', help='weight decay (default: 1e-4)',\n                        dest='weight_decay')\n    # 针对torch.optim.lr_scheduler.StepLR的参数\n    parser.add_argument('--lr-step-size', default=8, type=int, help='decrease lr every step-size epochs')\n    # 针对torch.optim.lr_scheduler.MultiStepLR的参数\n    parser.add_argument('--lr-steps', default=[7, 12], nargs='+', type=int, help='decrease lr every step-size epochs')\n    # 针对torch.optim.lr_scheduler.MultiStepLR的参数\n    parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')\n    # 训练过程打印信息的频率\n    parser.add_argument('--print-freq', default=20, type=int, help='print frequency')\n    # 文件保存地址\n    parser.add_argument('--output-dir', default='./multi_train', help='path where to save')\n    # 基于上次的训练结果接着训练\n    parser.add_argument('--resume', default='', help='resume from checkpoint')\n    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)\n    # 不训练，仅测试\n    parser.add_argument(\n        \"--test-only\",\n        dest=\"test_only\",\n        help=\"Only test the model\",\n        action=\"store_true\",\n    )\n\n    # 开启的进程数(注意不是线程)\n    parser.add_argument('--world-size', default=4, type=int,\n                        help='number of distributed processes')\n    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')\n    # 是否使用混合精度训练(需要GPU支持混合精度)\n    parser.add_argument(\"--amp\", default=False, help=\"Use torch.cuda.amp for mixed precision training\")\n\n    args = parser.parse_args()\n\n    # 如果指定了保存文件地址，检查文件夹是否存在，若不存在，则创建\n    if args.output_dir:\n        mkdir(args.output_dir)\n\n    main(args)\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/train_utils/__init__.py",
    "content": "from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups\nfrom .distributed_utils import init_distributed_mode, save_on_master, mkdir\nfrom .coco_utils import get_coco_api_from_dataset\nfrom .coco_eval import CocoEvaluator\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/train_utils/coco_eval.py",
    "content": "import json\nfrom collections import defaultdict\n\nimport numpy as np\nimport copy\nimport torch\nimport torch._six\nfrom pycocotools.cocoeval import COCOeval\nfrom pycocotools.coco import COCO\nimport pycocotools.mask as mask_util\n\nfrom .distributed_utils import all_gather\n\n\nclass CocoEvaluator(object):\n    def __init__(self, coco_gt, iou_types):\n        assert isinstance(iou_types, (list, tuple))\n        coco_gt = copy.deepcopy(coco_gt)\n        self.coco_gt = coco_gt\n\n        self.iou_types = iou_types\n        self.coco_eval = {}\n        for iou_type in iou_types:\n            self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)\n\n        self.img_ids = []\n        self.eval_imgs = {k: [] for k in iou_types}\n\n    def update(self, predictions):\n        img_ids = list(np.unique(list(predictions.keys())))\n        self.img_ids.extend(img_ids)\n\n        for iou_type in self.iou_types:\n            results = self.prepare(predictions, iou_type)\n            coco_dt = loadRes(self.coco_gt, results) if results else COCO()\n            coco_eval = self.coco_eval[iou_type]\n\n            coco_eval.cocoDt = coco_dt\n            coco_eval.params.imgIds = list(img_ids)\n            img_ids, eval_imgs = evaluate(coco_eval)\n\n            self.eval_imgs[iou_type].append(eval_imgs)\n\n    def synchronize_between_processes(self):\n        for iou_type in self.iou_types:\n            self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)\n            create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])\n\n    def accumulate(self):\n        for coco_eval in self.coco_eval.values():\n            coco_eval.accumulate()\n\n    def summarize(self):\n        for iou_type, coco_eval in self.coco_eval.items():\n            print(\"IoU metric: {}\".format(iou_type))\n            coco_eval.summarize()\n\n    def prepare(self, predictions, iou_type):\n        if iou_type == \"bbox\":\n            return self.prepare_for_coco_detection(predictions)\n        elif iou_type == \"segm\":\n            return self.prepare_for_coco_segmentation(predictions)\n        elif iou_type == \"keypoints\":\n            return self.prepare_for_coco_keypoint(predictions)\n        else:\n            raise ValueError(\"Unknown iou type {}\".format(iou_type))\n\n    def prepare_for_coco_detection(self, predictions):\n        coco_results = []\n        for original_id, prediction in predictions.items():\n            if len(prediction) == 0:\n                continue\n\n            boxes = prediction[\"boxes\"]\n            boxes = convert_to_xywh(boxes).tolist()\n            scores = prediction[\"scores\"].tolist()\n            labels = prediction[\"labels\"].tolist()\n\n            coco_results.extend(\n                [\n                    {\n                        \"image_id\": original_id,\n                        \"category_id\": labels[k],\n                        \"bbox\": box,\n                        \"score\": scores[k],\n                    }\n                    for k, box in enumerate(boxes)\n                ]\n            )\n        return coco_results\n\n    def prepare_for_coco_segmentation(self, predictions):\n        coco_results = []\n        for original_id, prediction in predictions.items():\n            if len(prediction) == 0:\n                continue\n\n            scores = prediction[\"scores\"]\n            labels = prediction[\"labels\"]\n            masks = prediction[\"masks\"]\n\n            masks = masks > 0.5\n\n            scores = prediction[\"scores\"].tolist()\n            labels = prediction[\"labels\"].tolist()\n\n            rles = [\n                mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order=\"F\"))[0]\n                for mask in masks\n            ]\n            for rle in rles:\n                rle[\"counts\"] = rle[\"counts\"].decode(\"utf-8\")\n\n            coco_results.extend(\n                [\n                    {\n                        \"image_id\": original_id,\n                        \"category_id\": labels[k],\n                        \"segmentation\": rle,\n                        \"score\": scores[k],\n                    }\n                    for k, rle in enumerate(rles)\n                ]\n            )\n        return coco_results\n\n    def prepare_for_coco_keypoint(self, predictions):\n        coco_results = []\n        for original_id, prediction in predictions.items():\n            if len(prediction) == 0:\n                continue\n\n            boxes = prediction[\"boxes\"]\n            boxes = convert_to_xywh(boxes).tolist()\n            scores = prediction[\"scores\"].tolist()\n            labels = prediction[\"labels\"].tolist()\n            keypoints = prediction[\"keypoints\"]\n            keypoints = keypoints.flatten(start_dim=1).tolist()\n\n            coco_results.extend(\n                [\n                    {\n                        \"image_id\": original_id,\n                        \"category_id\": labels[k],\n                        'keypoints': keypoint,\n                        \"score\": scores[k],\n                    }\n                    for k, keypoint in enumerate(keypoints)\n                ]\n            )\n        return coco_results\n\n\ndef convert_to_xywh(boxes):\n    xmin, ymin, xmax, ymax = boxes.unbind(1)\n    return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)\n\n\ndef merge(img_ids, eval_imgs):\n    all_img_ids = all_gather(img_ids)\n    all_eval_imgs = all_gather(eval_imgs)\n\n    merged_img_ids = []\n    for p in all_img_ids:\n        merged_img_ids.extend(p)\n\n    merged_eval_imgs = []\n    for p in all_eval_imgs:\n        merged_eval_imgs.append(p)\n\n    merged_img_ids = np.array(merged_img_ids)\n    merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)\n\n    # keep only unique (and in sorted order) images\n    merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)\n    merged_eval_imgs = merged_eval_imgs[..., idx]\n\n    return merged_img_ids, merged_eval_imgs\n\n\ndef create_common_coco_eval(coco_eval, img_ids, eval_imgs):\n    img_ids, eval_imgs = merge(img_ids, eval_imgs)\n    img_ids = list(img_ids)\n    eval_imgs = list(eval_imgs.flatten())\n\n    coco_eval.evalImgs = eval_imgs\n    coco_eval.params.imgIds = img_ids\n    coco_eval._paramsEval = copy.deepcopy(coco_eval.params)\n\n\n#################################################################\n# From pycocotools, just removed the prints and fixed\n# a Python3 bug about unicode not defined\n#################################################################\n\n# Ideally, pycocotools wouldn't have hard-coded prints\n# so that we could avoid copy-pasting those two functions\n\ndef createIndex(self):\n    # create index\n    # print('creating index...')\n    anns, cats, imgs = {}, {}, {}\n    imgToAnns, catToImgs = defaultdict(list), defaultdict(list)\n    if 'annotations' in self.dataset:\n        for ann in self.dataset['annotations']:\n            imgToAnns[ann['image_id']].append(ann)\n            anns[ann['id']] = ann\n\n    if 'images' in self.dataset:\n        for img in self.dataset['images']:\n            imgs[img['id']] = img\n\n    if 'categories' in self.dataset:\n        for cat in self.dataset['categories']:\n            cats[cat['id']] = cat\n\n    if 'annotations' in self.dataset and 'categories' in self.dataset:\n        for ann in self.dataset['annotations']:\n            catToImgs[ann['category_id']].append(ann['image_id'])\n\n    # print('index created!')\n\n    # create class members\n    self.anns = anns\n    self.imgToAnns = imgToAnns\n    self.catToImgs = catToImgs\n    self.imgs = imgs\n    self.cats = cats\n\n\nmaskUtils = mask_util\n\n\ndef loadRes(self, resFile):\n    \"\"\"\n    Load result file and return a result api object.\n    :param   resFile (str)     : file name of result file\n    :return: res (obj)         : result api object\n    \"\"\"\n    res = COCO()\n    res.dataset['images'] = [img for img in self.dataset['images']]\n\n    # print('Loading and preparing results...')\n    # tic = time.time()\n    if isinstance(resFile, torch._six.string_classes):\n        anns = json.load(open(resFile))\n    elif type(resFile) == np.ndarray:\n        anns = self.loadNumpyAnnotations(resFile)\n    else:\n        anns = resFile\n    assert type(anns) == list, 'results in not an array of objects'\n    annsImgIds = [ann['image_id'] for ann in anns]\n    assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \\\n        'Results do not correspond to current coco set'\n    if 'caption' in anns[0]:\n        imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])\n        res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]\n        for id, ann in enumerate(anns):\n            ann['id'] = id + 1\n    elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:\n        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])\n        for id, ann in enumerate(anns):\n            bb = ann['bbox']\n            x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]\n            if 'segmentation' not in ann:\n                ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]\n            ann['area'] = bb[2] * bb[3]\n            ann['id'] = id + 1\n            ann['iscrowd'] = 0\n    elif 'segmentation' in anns[0]:\n        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])\n        for id, ann in enumerate(anns):\n            # now only support compressed RLE format as segmentation results\n            ann['area'] = maskUtils.area(ann['segmentation'])\n            if 'bbox' not in ann:\n                ann['bbox'] = maskUtils.toBbox(ann['segmentation'])\n            ann['id'] = id + 1\n            ann['iscrowd'] = 0\n    elif 'keypoints' in anns[0]:\n        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])\n        for id, ann in enumerate(anns):\n            s = ann['keypoints']\n            x = s[0::3]\n            y = s[1::3]\n            x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y)\n            ann['area'] = (x2 - x1) * (y2 - y1)\n            ann['id'] = id + 1\n            ann['bbox'] = [x1, y1, x2 - x1, y2 - y1]\n    # print('DONE (t={:0.2f}s)'.format(time.time()- tic))\n\n    res.dataset['annotations'] = anns\n    createIndex(res)\n    return res\n\n\ndef evaluate(self):\n    '''\n    Run per image evaluation on given images and store results (a list of dict) in self.evalImgs\n    :return: None\n    '''\n    # tic = time.time()\n    # print('Running per image evaluation...')\n    p = self.params\n    # add backward compatibility if useSegm is specified in params\n    if p.useSegm is not None:\n        p.iouType = 'segm' if p.useSegm == 1 else 'bbox'\n        print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))\n    # print('Evaluate annotation type *{}*'.format(p.iouType))\n    p.imgIds = list(np.unique(p.imgIds))\n    if p.useCats:\n        p.catIds = list(np.unique(p.catIds))\n    p.maxDets = sorted(p.maxDets)\n    self.params = p\n\n    self._prepare()\n    # loop through images, area range, max detection number\n    catIds = p.catIds if p.useCats else [-1]\n\n    if p.iouType == 'segm' or p.iouType == 'bbox':\n        computeIoU = self.computeIoU\n    elif p.iouType == 'keypoints':\n        computeIoU = self.computeOks\n    self.ious = {\n        (imgId, catId): computeIoU(imgId, catId)\n        for imgId in p.imgIds\n        for catId in catIds}\n\n    evaluateImg = self.evaluateImg\n    maxDet = p.maxDets[-1]\n    evalImgs = [\n        evaluateImg(imgId, catId, areaRng, maxDet)\n        for catId in catIds\n        for areaRng in p.areaRng\n        for imgId in p.imgIds\n    ]\n    # this is NOT in the pycocotools code, but could be done outside\n    evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))\n    self._paramsEval = copy.deepcopy(self.params)\n    # toc = time.time()\n    # print('DONE (t={:0.2f}s).'.format(toc-tic))\n    return p.imgIds, evalImgs\n\n#################################################################\n# end of straight copy from pycocotools, just removing the prints\n#################################################################\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/train_utils/coco_utils.py",
    "content": "import torch\nimport torchvision\nimport torch.utils.data\nfrom pycocotools.coco import COCO\n\n\ndef convert_to_coco_api(ds):\n    coco_ds = COCO()\n    # annotation IDs need to start at 1, not 0\n    ann_id = 1\n    dataset = {'images': [], 'categories': [], 'annotations': []}\n    categories = set()\n    for img_idx in range(len(ds)):\n        # find better way to get target\n        hw, targets = ds.coco_index(img_idx)\n        image_id = targets[\"image_id\"].item()\n        img_dict = {}\n        img_dict['id'] = image_id\n        img_dict['height'] = hw[0]\n        img_dict['width'] = hw[1]\n        dataset['images'].append(img_dict)\n        bboxes = targets[\"boxes\"]\n        bboxes[:, 2:] -= bboxes[:, :2]\n        bboxes = bboxes.tolist()\n        labels = targets['labels'].tolist()\n        areas = targets['area'].tolist()\n        iscrowd = targets['iscrowd'].tolist()\n        num_objs = len(bboxes)\n        for i in range(num_objs):\n            ann = {}\n            ann['image_id'] = image_id\n            ann['bbox'] = bboxes[i]\n            ann['category_id'] = labels[i]\n            categories.add(labels[i])\n            ann['area'] = areas[i]\n            ann['iscrowd'] = iscrowd[i]\n            ann['id'] = ann_id\n            dataset['annotations'].append(ann)\n            ann_id += 1\n    dataset['categories'] = [{'id': i} for i in sorted(categories)]\n    coco_ds.dataset = dataset\n    coco_ds.createIndex()\n    return coco_ds\n\n\ndef get_coco_api_from_dataset(dataset):\n    for _ in range(10):\n        if isinstance(dataset, torchvision.datasets.CocoDetection):\n            break\n        if isinstance(dataset, torch.utils.data.Subset):\n            dataset = dataset.dataset\n    if isinstance(dataset, torchvision.datasets.CocoDetection):\n        return dataset.coco\n    return convert_to_coco_api(dataset)\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/train_utils/distributed_utils.py",
    "content": "from collections import defaultdict, deque\nimport datetime\nimport pickle\nimport time\nimport errno\nimport os\n\nimport torch\nimport torch.distributed as dist\n\n\nclass SmoothedValue(object):\n    \"\"\"Track a series of values and provide access to smoothed values over a\n    window or the global series average.\n    \"\"\"\n    def __init__(self, window_size=20, fmt=None):\n        if fmt is None:\n            fmt = \"{value:.4f} ({global_avg:.4f})\"\n        self.deque = deque(maxlen=window_size)  # deque简单理解成加强版list\n        self.total = 0.0\n        self.count = 0\n        self.fmt = fmt\n\n    def update(self, value, n=1):\n        self.deque.append(value)\n        self.count += n\n        self.total += value * n\n\n    def synchronize_between_processes(self):\n        \"\"\"\n        Warning: does not synchronize the deque!\n        \"\"\"\n        if not is_dist_avail_and_initialized():\n            return\n        t = torch.tensor([self.count, self.total], dtype=torch.float64, device=\"cuda\")\n        dist.barrier()\n        dist.all_reduce(t)\n        t = t.tolist()\n        self.count = int(t[0])\n        self.total = t[1]\n\n    @property\n    def median(self):  # @property 是装饰器，这里可简单理解为增加median属性(只读)\n        d = torch.tensor(list(self.deque))\n        return d.median().item()\n\n    @property\n    def avg(self):\n        d = torch.tensor(list(self.deque), dtype=torch.float32)\n        return d.mean().item()\n\n    @property\n    def global_avg(self):\n        return self.total / self.count\n\n    @property\n    def max(self):\n        return max(self.deque)\n\n    @property\n    def value(self):\n        return self.deque[-1]\n\n    def __str__(self):\n        return self.fmt.format(\n            median=self.median,\n            avg=self.avg,\n            global_avg=self.global_avg,\n            max=self.max,\n            value=self.value)\n\n\ndef all_gather(data):\n    \"\"\"\n    Run all_gather on arbitrary picklable data (not necessarily tensors)\n    Args:\n        data: any picklable object\n    Returns:\n        list[data]: list of data gathered from each rank\n    \"\"\"\n    world_size = get_world_size()\n    if world_size == 1:\n        return [data]\n\n    # serialized to a Tensor\n    buffer = pickle.dumps(data)\n    storage = torch.ByteStorage.from_buffer(buffer)\n    tensor = torch.ByteTensor(storage).to(\"cuda\")\n\n    # obtain Tensor size of each rank\n    local_size = torch.tensor([tensor.numel()], device=\"cuda\")\n    size_list = [torch.tensor([0], device=\"cuda\") for _ in range(world_size)]\n    dist.all_gather(size_list, local_size)\n    size_list = [int(size.item()) for size in size_list]\n    max_size = max(size_list)\n\n    # receiving Tensor from all ranks\n    # we pad the tensor because torch all_gather does not support\n    # gathering tensors of different shapes\n    tensor_list = []\n    for _ in size_list:\n        tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device=\"cuda\"))\n    if local_size != max_size:\n        padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device=\"cuda\")\n        tensor = torch.cat((tensor, padding), dim=0)\n    dist.all_gather(tensor_list, tensor)\n\n    data_list = []\n    for size, tensor in zip(size_list, tensor_list):\n        buffer = tensor.cpu().numpy().tobytes()[:size]\n        data_list.append(pickle.loads(buffer))\n\n    return data_list\n\n\ndef reduce_dict(input_dict, average=True):\n    \"\"\"\n    Args:\n        input_dict (dict): all the values will be reduced\n        average (bool): whether to do average or sum\n    Reduce the values in the dictionary from all processes so that all processes\n    have the averaged results. Returns a dict with the same fields as\n    input_dict, after reduction.\n    \"\"\"\n    world_size = get_world_size()\n    if world_size < 2:  # 单GPU的情况\n        return input_dict\n    with torch.no_grad():  # 多GPU的情况\n        names = []\n        values = []\n        # sort the keys so that they are consistent across processes\n        for k in sorted(input_dict.keys()):\n            names.append(k)\n            values.append(input_dict[k])\n        values = torch.stack(values, dim=0)\n        dist.all_reduce(values)\n        if average:\n            values /= world_size\n\n        reduced_dict = {k: v for k, v in zip(names, values)}\n        return reduced_dict\n\n\nclass MetricLogger(object):\n    def __init__(self, delimiter=\"\\t\"):\n        self.meters = defaultdict(SmoothedValue)\n        self.delimiter = delimiter\n\n    def update(self, **kwargs):\n        for k, v in kwargs.items():\n            if isinstance(v, torch.Tensor):\n                v = v.item()\n            assert isinstance(v, (float, int))\n            self.meters[k].update(v)\n\n    def __getattr__(self, attr):\n        if attr in self.meters:\n            return self.meters[attr]\n        if attr in self.__dict__:\n            return self.__dict__[attr]\n        raise AttributeError(\"'{}' object has no attribute '{}'\".format(\n            type(self).__name__, attr))\n\n    def __str__(self):\n        loss_str = []\n        for name, meter in self.meters.items():\n            loss_str.append(\n                \"{}: {}\".format(name, str(meter))\n            )\n        return self.delimiter.join(loss_str)\n\n    def synchronize_between_processes(self):\n        for meter in self.meters.values():\n            meter.synchronize_between_processes()\n\n    def add_meter(self, name, meter):\n        self.meters[name] = meter\n\n    def log_every(self, iterable, print_freq, header=None):\n        i = 0\n        if not header:\n            header = \"\"\n        start_time = time.time()\n        end = time.time()\n        iter_time = SmoothedValue(fmt='{avg:.4f}')\n        data_time = SmoothedValue(fmt='{avg:.4f}')\n        space_fmt = \":\" + str(len(str(len(iterable)))) + \"d\"\n        if torch.cuda.is_available():\n            log_msg = self.delimiter.join([header,\n                                           '[{0' + space_fmt + '}/{1}]',\n                                           'eta: {eta}',\n                                           '{meters}',\n                                           'time: {time}',\n                                           'data: {data}',\n                                           'max mem: {memory:.0f}'])\n        else:\n            log_msg = self.delimiter.join([header,\n                                           '[{0' + space_fmt + '}/{1}]',\n                                           'eta: {eta}',\n                                           '{meters}',\n                                           'time: {time}',\n                                           'data: {data}'])\n        MB = 1024.0 * 1024.0\n        for obj in iterable:\n            data_time.update(time.time() - end)\n            yield obj\n            iter_time.update(time.time() - end)\n            if i % print_freq == 0 or i == len(iterable) - 1:\n                eta_second = iter_time.global_avg * (len(iterable) - i)\n                eta_string = str(datetime.timedelta(seconds=eta_second))\n                if torch.cuda.is_available():\n                    print(log_msg.format(i, len(iterable),\n                                         eta=eta_string,\n                                         meters=str(self),\n                                         time=str(iter_time),\n                                         data=str(data_time),\n                                         memory=torch.cuda.max_memory_allocated() / MB))\n                else:\n                    print(log_msg.format(i, len(iterable),\n                                         eta=eta_string,\n                                         meters=str(self),\n                                         time=str(iter_time),\n                                         data=str(data_time)))\n            i += 1\n            end = time.time()\n        total_time = time.time() - start_time\n        total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n        print('{} Total time: {} ({:.4f} s / it)'.format(header,\n                                                         total_time_str,\n\n                                                         total_time / len(iterable)))\n\n\ndef warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):\n\n    def f(x):\n        \"\"\"根据step数返回一个学习率倍率因子\"\"\"\n        if x >= warmup_iters:  # 当迭代数大于给定的warmup_iters时，倍率因子为1\n            return 1\n        alpha = float(x) / warmup_iters\n        # 迭代过程中倍率因子从warmup_factor -> 1\n        return warmup_factor * (1 - alpha) + alpha\n\n    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)\n\n\ndef mkdir(path):\n    try:\n        os.makedirs(path)\n    except OSError as e:\n        if e.errno != errno.EEXIST:\n            raise\n\n\ndef setup_for_distributed(is_master):\n    \"\"\"\n    This function disables when not in master process\n    \"\"\"\n    import builtins as __builtin__\n    builtin_print = __builtin__.print\n\n    def print(*args, **kwargs):\n        force = kwargs.pop('force', False)\n        if is_master or force:\n            builtin_print(*args, **kwargs)\n\n    __builtin__.print = print\n\n\ndef is_dist_avail_and_initialized():\n    \"\"\"检查是否支持分布式环境\"\"\"\n    if not dist.is_available():\n        return False\n    if not dist.is_initialized():\n        return False\n    return True\n\n\ndef get_world_size():\n    if not is_dist_avail_and_initialized():\n        return 1\n    return dist.get_world_size()\n\n\ndef get_rank():\n    if not is_dist_avail_and_initialized():\n        return 0\n    return dist.get_rank()\n\n\ndef is_main_process():\n    return get_rank() == 0\n\n\ndef save_on_master(*args, **kwargs):\n    if is_main_process():\n        torch.save(*args, **kwargs)\n\n\ndef init_distributed_mode(args):\n    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:\n        args.rank = int(os.environ[\"RANK\"])\n        args.world_size = int(os.environ['WORLD_SIZE'])\n        args.gpu = int(os.environ['LOCAL_RANK'])\n    elif 'SLURM_PROCID' in os.environ:\n        args.rank = int(os.environ['SLURM_PROCID'])\n        args.gpu = args.rank % torch.cuda.device_count()\n    else:\n        print('Not using distributed mode')\n        args.distributed = False\n        return\n\n    args.distributed = True\n\n    torch.cuda.set_device(args.gpu)\n    args.dist_backend = 'nccl'\n    print('| distributed init (rank {}): {}'.format(\n        args.rank, args.dist_url), flush=True)\n    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,\n                                         world_size=args.world_size, rank=args.rank)\n    torch.distributed.barrier()\n    setup_for_distributed(args.rank == 0)\n\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/train_utils/group_by_aspect_ratio.py",
    "content": "import bisect\nfrom collections import defaultdict\nimport copy\nfrom itertools import repeat, chain\nimport math\nimport numpy as np\n\nimport torch\nimport torch.utils.data\nfrom torch.utils.data.sampler import BatchSampler, Sampler\nfrom torch.utils.model_zoo import tqdm\nimport torchvision\n\nfrom PIL import Image\n\n\ndef _repeat_to_at_least(iterable, n):\n    repeat_times = math.ceil(n / len(iterable))\n    repeated = chain.from_iterable(repeat(iterable, repeat_times))\n    return list(repeated)\n\n\nclass GroupedBatchSampler(BatchSampler):\n    \"\"\"\n    Wraps another sampler to yield a mini-batch of indices.\n    It enforces that the batch only contain elements from the same group.\n    It also tries to provide mini-batches which follows an ordering which is\n    as close as possible to the ordering from the original sampler.\n    Arguments:\n        sampler (Sampler): Base sampler.\n        group_ids (list[int]): If the sampler produces indices in range [0, N),\n            `group_ids` must be a list of `N` ints which contains the group id of each sample.\n            The group ids must be a continuous set of integers starting from\n            0, i.e. they must be in the range [0, num_groups).\n        batch_size (int): Size of mini-batch.\n    \"\"\"\n    def __init__(self, sampler, group_ids, batch_size):\n        if not isinstance(sampler, Sampler):\n            raise ValueError(\n                \"sampler should be an instance of \"\n                \"torch.utils.data.Sampler, but got sampler={}\".format(sampler)\n            )\n        self.sampler = sampler\n        self.group_ids = group_ids\n        self.batch_size = batch_size\n\n    def __iter__(self):\n        buffer_per_group = defaultdict(list)\n        samples_per_group = defaultdict(list)\n\n        num_batches = 0\n        for idx in self.sampler:\n            group_id = self.group_ids[idx]\n            buffer_per_group[group_id].append(idx)\n            samples_per_group[group_id].append(idx)\n            if len(buffer_per_group[group_id]) == self.batch_size:\n                yield buffer_per_group[group_id]\n                num_batches += 1\n                del buffer_per_group[group_id]\n            assert len(buffer_per_group[group_id]) < self.batch_size\n\n        # now we have run out of elements that satisfy\n        # the group criteria, let's return the remaining\n        # elements so that the size of the sampler is\n        # deterministic\n        expected_num_batches = len(self)\n        num_remaining = expected_num_batches - num_batches\n        if num_remaining > 0:\n            # for the remaining batches, take first the buffers with largest number\n            # of elements\n            for group_id, _ in sorted(buffer_per_group.items(),\n                                      key=lambda x: len(x[1]), reverse=True):\n                remaining = self.batch_size - len(buffer_per_group[group_id])\n                samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining)\n                buffer_per_group[group_id].extend(samples_from_group_id[:remaining])\n                assert len(buffer_per_group[group_id]) == self.batch_size\n                yield buffer_per_group[group_id]\n                num_remaining -= 1\n                if num_remaining == 0:\n                    break\n        assert num_remaining == 0\n\n    def __len__(self):\n        return len(self.sampler) // self.batch_size\n\n\ndef _compute_aspect_ratios_slow(dataset, indices=None):\n    print(\"Your dataset doesn't support the fast path for \"\n          \"computing the aspect ratios, so will iterate over \"\n          \"the full dataset and load every image instead. \"\n          \"This might take some time...\")\n    if indices is None:\n        indices = range(len(dataset))\n\n    class SubsetSampler(Sampler):\n        def __init__(self, indices):\n            self.indices = indices\n\n        def __iter__(self):\n            return iter(self.indices)\n\n        def __len__(self):\n            return len(self.indices)\n\n    sampler = SubsetSampler(indices)\n    data_loader = torch.utils.data.DataLoader(\n        dataset, batch_size=1, sampler=sampler,\n        num_workers=14,  # you might want to increase it for faster processing\n        collate_fn=lambda x: x[0])\n    aspect_ratios = []\n    with tqdm(total=len(dataset)) as pbar:\n        for _i, (img, _) in enumerate(data_loader):\n            pbar.update(1)\n            height, width = img.shape[-2:]\n            aspect_ratio = float(width) / float(height)\n            aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_custom_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n    aspect_ratios = []\n    for i in indices:\n        height, width = dataset.get_height_and_width(i)\n        aspect_ratio = float(width) / float(height)\n        aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_coco_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n    aspect_ratios = []\n    for i in indices:\n        img_info = dataset.coco.imgs[dataset.ids[i]]\n        aspect_ratio = float(img_info[\"width\"]) / float(img_info[\"height\"])\n        aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_voc_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n    aspect_ratios = []\n    for i in indices:\n        # this doesn't load the data into memory, because PIL loads it lazily\n        width, height = Image.open(dataset.images[i]).size\n        aspect_ratio = float(width) / float(height)\n        aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_subset_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n\n    ds_indices = [dataset.indices[i] for i in indices]\n    return compute_aspect_ratios(dataset.dataset, ds_indices)\n\n\ndef compute_aspect_ratios(dataset, indices=None):\n    if hasattr(dataset, \"get_height_and_width\"):\n        return _compute_aspect_ratios_custom_dataset(dataset, indices)\n\n    if isinstance(dataset, torchvision.datasets.CocoDetection):\n        return _compute_aspect_ratios_coco_dataset(dataset, indices)\n\n    if isinstance(dataset, torchvision.datasets.VOCDetection):\n        return _compute_aspect_ratios_voc_dataset(dataset, indices)\n\n    if isinstance(dataset, torch.utils.data.Subset):\n        return _compute_aspect_ratios_subset_dataset(dataset, indices)\n\n    # slow path\n    return _compute_aspect_ratios_slow(dataset, indices)\n\n\ndef _quantize(x, bins):\n    bins = copy.deepcopy(bins)\n    bins = sorted(bins)\n    # bisect_right：寻找y元素按顺序应该排在bins中哪个元素的右边，返回的是索引\n    quantized = list(map(lambda y: bisect.bisect_right(bins, y), x))\n    return quantized\n\n\ndef create_aspect_ratio_groups(dataset, k=0):\n    # 计算所有数据集中的图片width/height比例\n    aspect_ratios = compute_aspect_ratios(dataset)\n    # 将[0.5, 2]区间划分成2*k+1等份\n    bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0]\n\n    # 统计所有图像比例在bins区间中的位置索引\n    groups = _quantize(aspect_ratios, bins)\n    # count number of elements per group\n    # 统计每个区间的频次\n    counts = np.unique(groups, return_counts=True)[1]\n    fbins = [0] + bins + [np.inf]\n    print(\"Using {} as bins for aspect ratio quantization\".format(fbins))\n    print(\"Count of instances per bin: {}\".format(counts))\n    return groups\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/train_utils/train_eval_utils.py",
    "content": "import math\nimport sys\nimport time\n\nimport torch\n\nfrom .coco_utils import get_coco_api_from_dataset\nfrom .coco_eval import CocoEvaluator\nimport train_utils.distributed_utils as utils\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch,\n                    print_freq=50, warmup=False, scaler=None):\n    model.train()\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))\n    header = 'Epoch: [{}]'.format(epoch)\n\n    lr_scheduler = None\n    if epoch == 0 and warmup is True:  # 当训练第一轮（epoch=0）时，启用warmup训练方式，可理解为热身训练\n        warmup_factor = 1.0 / 1000\n        warmup_iters = min(1000, len(data_loader) - 1)\n\n        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)\n\n    mloss = torch.zeros(1).to(device)  # mean losses\n    for i, [images, targets] in enumerate(metric_logger.log_every(data_loader, print_freq, header)):\n        images = list(image.to(device) for image in images)\n        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]\n\n        # 混合精度训练上下文管理器，如果在CPU环境中不起任何作用\n        with torch.cuda.amp.autocast(enabled=scaler is not None):\n            loss_dict = model(images, targets)\n\n            losses = sum(loss for loss in loss_dict.values())\n\n        # reduce losses over all GPUs for logging purpose\n        loss_dict_reduced = utils.reduce_dict(loss_dict)\n        losses_reduced = sum(loss for loss in loss_dict_reduced.values())\n\n        loss_value = losses_reduced.item()\n        # 记录训练损失\n        mloss = (mloss * i + loss_value) / (i + 1)  # update mean losses\n\n        if not math.isfinite(loss_value):  # 当计算的损失为无穷大时停止训练\n            print(\"Loss is {}, stopping training\".format(loss_value))\n            print(loss_dict_reduced)\n            sys.exit(1)\n\n        optimizer.zero_grad()\n        if scaler is not None:\n            scaler.scale(losses).backward()\n            scaler.step(optimizer)\n            scaler.update()\n        else:\n            losses.backward()\n            optimizer.step()\n\n        if lr_scheduler is not None:  # 第一轮使用warmup训练方式\n            lr_scheduler.step()\n\n        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)\n        now_lr = optimizer.param_groups[0][\"lr\"]\n        metric_logger.update(lr=now_lr)\n\n    return mloss, now_lr\n\n\n@torch.no_grad()\ndef evaluate(model, data_loader, device):\n\n    cpu_device = torch.device(\"cpu\")\n    model.eval()\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    header = \"Test: \"\n\n    coco = get_coco_api_from_dataset(data_loader.dataset)\n    iou_types = _get_iou_types(model)\n    coco_evaluator = CocoEvaluator(coco, iou_types)\n\n    for image, targets in metric_logger.log_every(data_loader, 100, header):\n        image = list(img.to(device) for img in image)\n\n        # 当使用CPU时，跳过GPU相关指令\n        if device != torch.device(\"cpu\"):\n            torch.cuda.synchronize(device)\n\n        model_time = time.time()\n        outputs = model(image)\n\n        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]\n        model_time = time.time() - model_time\n\n        res = {target[\"image_id\"].item(): output for target, output in zip(targets, outputs)}\n\n        evaluator_time = time.time()\n        coco_evaluator.update(res)\n        evaluator_time = time.time() - evaluator_time\n        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)\n\n    # gather the stats from all processes\n    metric_logger.synchronize_between_processes()\n    print(\"Averaged stats:\", metric_logger)\n    coco_evaluator.synchronize_between_processes()\n\n    # accumulate predictions from all images\n    coco_evaluator.accumulate()\n    coco_evaluator.summarize()\n\n    coco_info = coco_evaluator.coco_eval[iou_types[0]].stats.tolist()  # numpy to list\n\n    return coco_info\n\n\ndef _get_iou_types(model):\n    model_without_ddp = model\n    if isinstance(model, torch.nn.parallel.DistributedDataParallel):\n        model_without_ddp = model.module\n    iou_types = [\"bbox\"]\n    return iou_types\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/transforms.py",
    "content": "import random\nfrom torchvision.transforms import functional as F\n\n\nclass Compose(object):\n    \"\"\"组合多个transform函数\"\"\"\n    def __init__(self, transforms):\n        self.transforms = transforms\n\n    def __call__(self, image, target):\n        for t in self.transforms:\n            image, target = t(image, target)\n        return image, target\n\n\nclass ToTensor(object):\n    \"\"\"将PIL图像转为Tensor\"\"\"\n    def __call__(self, image, target):\n        image = F.to_tensor(image)\n        return image, target\n\n\nclass RandomHorizontalFlip(object):\n    \"\"\"随机水平翻转图像以及bboxes\"\"\"\n    def __init__(self, prob=0.5):\n        self.prob = prob\n\n    def __call__(self, image, target):\n        if random.random() < self.prob:\n            height, width = image.shape[-2:]\n            image = image.flip(-1)  # 水平翻转图片\n            bbox = target[\"boxes\"]\n            # bbox: xmin, ymin, xmax, ymax\n            bbox[:, [0, 2]] = width - bbox[:, [2, 0]]  # 翻转对应bbox坐标信息\n            target[\"boxes\"] = bbox\n        return image, target\n"
  },
  {
    "path": "pytorch_object_detection/retinaNet/validation.py",
    "content": "\"\"\"\n该脚本用于调用训练好的模型权重去计算验证集/测试集的COCO指标\n以及每个类别的mAP(IoU=0.5)\n\"\"\"\n\nimport os\nimport json\n\nimport torch\nfrom tqdm import tqdm\nimport numpy as np\n\nimport transforms\nfrom network_files import RetinaNet\nfrom backbone import resnet50_fpn_backbone, LastLevelP6P7\nfrom my_dataset import VOCDataSet\nfrom train_utils import get_coco_api_from_dataset, CocoEvaluator\n\n\ndef summarize(self, catId=None):\n    \"\"\"\n    Compute and display summary metrics for evaluation results.\n    Note this functin can *only* be applied on the default parameter setting\n    \"\"\"\n\n    def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100):\n        p = self.params\n        iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'\n        titleStr = 'Average Precision' if ap == 1 else 'Average Recall'\n        typeStr = '(AP)' if ap == 1 else '(AR)'\n        iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \\\n            if iouThr is None else '{:0.2f}'.format(iouThr)\n\n        aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]\n        mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]\n\n        if ap == 1:\n            # dimension of precision: [TxRxKxAxM]\n            s = self.eval['precision']\n            # IoU\n            if iouThr is not None:\n                t = np.where(iouThr == p.iouThrs)[0]\n                s = s[t]\n\n            if isinstance(catId, int):\n                s = s[:, :, catId, aind, mind]\n            else:\n                s = s[:, :, :, aind, mind]\n\n        else:\n            # dimension of recall: [TxKxAxM]\n            s = self.eval['recall']\n            if iouThr is not None:\n                t = np.where(iouThr == p.iouThrs)[0]\n                s = s[t]\n\n            if isinstance(catId, int):\n                s = s[:, catId, aind, mind]\n            else:\n                s = s[:, :, aind, mind]\n\n        if len(s[s > -1]) == 0:\n            mean_s = -1\n        else:\n            mean_s = np.mean(s[s > -1])\n\n        print_string = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)\n        return mean_s, print_string\n\n    stats, print_list = [0] * 12, [\"\"] * 12\n    stats[0], print_list[0] = _summarize(1)\n    stats[1], print_list[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2])\n    stats[2], print_list[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2])\n    stats[3], print_list[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2])\n    stats[4], print_list[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2])\n    stats[5], print_list[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2])\n    stats[6], print_list[6] = _summarize(0, maxDets=self.params.maxDets[0])\n    stats[7], print_list[7] = _summarize(0, maxDets=self.params.maxDets[1])\n    stats[8], print_list[8] = _summarize(0, maxDets=self.params.maxDets[2])\n    stats[9], print_list[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2])\n    stats[10], print_list[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2])\n    stats[11], print_list[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2])\n\n    print_info = \"\\n\".join(print_list)\n\n    if not self.eval:\n        raise Exception('Please run accumulate() first')\n\n    return stats, print_info\n\n\ndef main(parser_data):\n    device = torch.device(parser_data.device if torch.cuda.is_available() else \"cpu\")\n    print(\"Using {} device training.\".format(device.type))\n\n    data_transform = {\n        \"val\": transforms.Compose([transforms.ToTensor()])\n    }\n\n    # read class_indict\n    label_json_path = './pascal_voc_classes.json'\n    assert os.path.exists(label_json_path), \"json file {} dose not exist.\".format(label_json_path)\n    with open(label_json_path, 'r') as f:\n        class_dict = json.load(f)\n\n    category_index = {v: k for k, v in class_dict.items()}\n\n    VOC_root = parser_data.data_path\n    # check voc root\n    if os.path.exists(os.path.join(VOC_root, \"VOCdevkit\")) is False:\n        raise FileNotFoundError(\"VOCdevkit dose not in path:'{}'.\".format(VOC_root))\n\n    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch\n    batch_size = parser_data.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using %g dataloader workers' % nw)\n\n    # load validation data set\n    val_dataset = VOCDataSet(VOC_root, \"2012\", data_transform[\"val\"], \"val.txt\")\n    val_dataset_loader = torch.utils.data.DataLoader(val_dataset,\n                                                     batch_size=batch_size,\n                                                     shuffle=False,\n                                                     num_workers=nw,\n                                                     pin_memory=True,\n                                                     collate_fn=val_dataset.collate_fn)\n\n    # create model\n    # 注意，这里的norm_layer要和训练脚本中保持一致\n    backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d,\n                                     returned_layers=[2, 3, 4],\n                                     extra_blocks=LastLevelP6P7(256, 256))\n    model = RetinaNet(backbone, parser_data.num_classes)\n\n    # 载入你自己训练好的模型权重\n    weights_path = parser_data.weights_path\n    assert os.path.exists(weights_path), \"not found {} file.\".format(weights_path)\n    weights_dict = torch.load(weights_path, map_location='cpu')\n    weights_dict = weights_dict[\"model\"] if \"model\" in weights_dict else weights_dict\n    model.load_state_dict(weights_dict)\n    # print(model)\n\n    model.to(device)\n\n    # evaluate on the test dataset\n    coco = get_coco_api_from_dataset(val_dataset)\n    iou_types = [\"bbox\"]\n    coco_evaluator = CocoEvaluator(coco, iou_types)\n    cpu_device = torch.device(\"cpu\")\n\n    model.eval()\n    with torch.no_grad():\n        for image, targets in tqdm(val_dataset_loader, desc=\"validation...\"):\n            # 将图片传入指定设备device\n            image = list(img.to(device) for img in image)\n\n            # inference\n            outputs = model(image)\n\n            outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]\n            res = {target[\"image_id\"].item(): output for target, output in zip(targets, outputs)}\n            coco_evaluator.update(res)\n\n    coco_evaluator.synchronize_between_processes()\n\n    # accumulate predictions from all images\n    coco_evaluator.accumulate()\n    coco_evaluator.summarize()\n\n    coco_eval = coco_evaluator.coco_eval[\"bbox\"]\n    # calculate COCO info for all classes\n    coco_stats, print_coco = summarize(coco_eval)\n\n    # calculate voc info for every classes(IoU=0.5)\n    voc_map_info_list = []\n    for i in range(len(category_index)):\n        stats, _ = summarize(coco_eval, catId=i)\n        voc_map_info_list.append(\" {:15}: {}\".format(category_index[i], stats[1]))\n\n    print_voc = \"\\n\".join(voc_map_info_list)\n    print(print_voc)\n\n    # 将验证结果保存至txt文件中\n    with open(\"record_mAP.txt\", \"w\") as f:\n        record_lines = [\"COCO results:\",\n                        print_coco,\n                        \"\",\n                        \"mAP(IoU=0.5) for each category:\",\n                        print_voc]\n        f.write(\"\\n\".join(record_lines))\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 使用设备类型\n    parser.add_argument('--device', default='cuda:0', help='device')\n\n    # 检测目标类别数\n    parser.add_argument('--num-classes', type=int, default='20', help='number of classes')\n\n    # 数据集的根目录(VOCdevkit)\n    parser.add_argument('--data-path', default='/data', help='dataset root')\n\n    # 训练好的权重文件\n    parser.add_argument('--weights-path', default='./save_weights/model.pth', type=str, help='training weights')\n\n    # batch size\n    parser.add_argument('--batch_size', default=1, type=int, metavar='N',\n                        help='batch size when validation.')\n\n    args = parser.parse_args()\n\n    main(args)\n"
  },
  {
    "path": "pytorch_object_detection/ssd/README.md",
    "content": "# SSD: Single Shot MultiBox Detector\n\n## 环境配置：\n* Python 3.6/3.7/3.8\n* Pytorch 1.7.1\n* pycocotools(Linux:```pip install pycocotools```; Windows:```pip install pycocotools-windows```(不需要额外安装vs))\n* Ubuntu或Centos(不建议Windows)\n* 最好使用GPU训练\n\n## 文件结构：\n```\n├── src: 实现SSD模型的相关模块    \n│     ├── resnet50_backbone.py   使用resnet50网络作为SSD的backbone  \n│     ├── ssd_model.py           SSD网络结构文件 \n│     └── utils.py               训练过程中使用到的一些功能实现\n├── train_utils: 训练验证相关模块（包括cocotools）  \n├── my_dataset.py: 自定义dataset用于读取VOC数据集    \n├── train_ssd300.py: 以resnet50做为backbone的SSD网络进行训练    \n├── train_multi_GPU.py: 针对使用多GPU的用户使用    \n├── predict_test.py: 简易的预测脚本，使用训练好的权重进行预测测试    \n├── pascal_voc_classes.json: pascal_voc标签文件    \n├── plot_curve.py: 用于绘制训练过程的损失以及验证集的mAP\n└── validation.py: 利用训练好的权重验证/测试数据的COCO指标，并生成record_mAP.txt文件\n```\n\n## 预训练权重下载地址（下载后放入src文件夹中）：\n* ResNet50+SSD: https://ngc.nvidia.com/catalog/models  \n `搜索ssd -> 找到SSD for PyTorch(FP32) -> download FP32 -> 解压文件`\n* 如果找不到可通过百度网盘下载，链接:https://pan.baidu.com/s/1byOnoNuqmBLZMDA0-lbCMQ 提取码:iggj \n\n## 数据集，本例程使用的是PASCAL VOC2012数据集(下载后放入项目当前文件夹中)\n* Pascal VOC2012 train/val数据集下载地址：http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar\n* Pascal VOC2007 test数据集请参考：http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar\n* 如果不了解数据集或者想使用自己的数据集进行训练，请参考我的bilibili：https://b23.tv/F1kSCK\n\n## 训练方法\n* 确保提前准备好数据集\n* 确保提前下载好对应预训练模型权重\n* 单GPU训练或CPU，直接使用train_ssd300.py训练脚本\n* 若要使用多GPU训练，使用 \"python -m torch.distributed.launch --nproc_per_node=8 --use_env train_multi_GPU.py\" 指令,nproc_per_node参数为使用GPU数量\n* 训练过程中保存的`results.txt`是每个epoch在验证集上的COCO指标，前12个值是COCO指标，后面两个值是训练平均损失以及学习率\n\n## 如果对SSD算法原理不是很理解可参考我的bilibili\n* https://www.bilibili.com/video/BV1fT4y1L7Gi\n\n## 进一步了解该项目，以及对SSD算法代码的分析可参考我的bilibili\n* https://www.bilibili.com/video/BV1vK411H771/\n\n## Resnet50 + SSD算法框架图\n![Resnet50 SSD](res50_ssd.png) \n"
  },
  {
    "path": "pytorch_object_detection/ssd/draw_box_utils.py",
    "content": "from PIL.Image import Image, fromarray\nimport PIL.ImageDraw as ImageDraw\nimport PIL.ImageFont as ImageFont\nfrom PIL import ImageColor\nimport numpy as np\n\nSTANDARD_COLORS = [\n    'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',\n    'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',\n    'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',\n    'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',\n    'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',\n    'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',\n    'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',\n    'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',\n    'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',\n    'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',\n    'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',\n    'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',\n    'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',\n    'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',\n    'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',\n    'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',\n    'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',\n    'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',\n    'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',\n    'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',\n    'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',\n    'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',\n    'WhiteSmoke', 'Yellow', 'YellowGreen'\n]\n\n\ndef draw_text(draw,\n              box: list,\n              cls: int,\n              score: float,\n              category_index: dict,\n              color: str,\n              font: str = 'arial.ttf',\n              font_size: int = 24):\n    \"\"\"\n    将目标边界框和类别信息绘制到图片上\n    \"\"\"\n    try:\n        font = ImageFont.truetype(font, font_size)\n    except IOError:\n        font = ImageFont.load_default()\n\n    left, top, right, bottom = box\n    # If the total height of the display strings added to the top of the bounding\n    # box exceeds the top of the image, stack the strings below the bounding box\n    # instead of above.\n    display_str = f\"{category_index[str(cls)]}: {int(100 * score)}%\"\n    display_str_heights = [font.getsize(ds)[1] for ds in display_str]\n    # Each display_str has a top and bottom margin of 0.05x.\n    display_str_height = (1 + 2 * 0.05) * max(display_str_heights)\n\n    if top > display_str_height:\n        text_top = top - display_str_height\n        text_bottom = top\n    else:\n        text_top = bottom\n        text_bottom = bottom + display_str_height\n\n    for ds in display_str:\n        text_width, text_height = font.getsize(ds)\n        margin = np.ceil(0.05 * text_width)\n        draw.rectangle([(left, text_top),\n                        (left + text_width + 2 * margin, text_bottom)], fill=color)\n        draw.text((left + margin, text_top),\n                  ds,\n                  fill='black',\n                  font=font)\n        left += text_width\n\n\ndef draw_masks(image, masks, colors, thresh: float = 0.7, alpha: float = 0.5):\n    np_image = np.array(image)\n    masks = np.where(masks > thresh, True, False)\n\n    # colors = np.array(colors)\n    img_to_draw = np.copy(np_image)\n    # TODO: There might be a way to vectorize this\n    for mask, color in zip(masks, colors):\n        img_to_draw[mask] = color\n\n    out = np_image * (1 - alpha) + img_to_draw * alpha\n    return fromarray(out.astype(np.uint8))\n\n\ndef draw_objs(image: Image,\n              boxes: np.ndarray = None,\n              classes: np.ndarray = None,\n              scores: np.ndarray = None,\n              masks: np.ndarray = None,\n              category_index: dict = None,\n              box_thresh: float = 0.1,\n              mask_thresh: float = 0.5,\n              line_thickness: int = 8,\n              font: str = 'arial.ttf',\n              font_size: int = 24,\n              draw_boxes_on_image: bool = True,\n              draw_masks_on_image: bool = False):\n    \"\"\"\n    将目标边界框信息，类别信息，mask信息绘制在图片上\n    Args:\n        image: 需要绘制的图片\n        boxes: 目标边界框信息\n        classes: 目标类别信息\n        scores: 目标概率信息\n        masks: 目标mask信息\n        category_index: 类别与名称字典\n        box_thresh: 过滤的概率阈值\n        mask_thresh:\n        line_thickness: 边界框宽度\n        font: 字体类型\n        font_size: 字体大小\n        draw_boxes_on_image:\n        draw_masks_on_image:\n\n    Returns:\n\n    \"\"\"\n\n    # 过滤掉低概率的目标\n    idxs = np.greater(scores, box_thresh)\n    boxes = boxes[idxs]\n    classes = classes[idxs]\n    scores = scores[idxs]\n    if masks is not None:\n        masks = masks[idxs]\n    if len(boxes) == 0:\n        return image\n\n    colors = [ImageColor.getrgb(STANDARD_COLORS[cls % len(STANDARD_COLORS)]) for cls in classes]\n\n    if draw_boxes_on_image:\n        # Draw all boxes onto image.\n        draw = ImageDraw.Draw(image)\n        for box, cls, score, color in zip(boxes, classes, scores, colors):\n            left, top, right, bottom = box\n            # 绘制目标边界框\n            draw.line([(left, top), (left, bottom), (right, bottom),\n                       (right, top), (left, top)], width=line_thickness, fill=color)\n            # 绘制类别和概率信息\n            draw_text(draw, box.tolist(), int(cls), float(score), category_index, color, font, font_size)\n\n    if draw_masks_on_image and (masks is not None):\n        # Draw all mask onto image.\n        image = draw_masks(image, masks, colors, mask_thresh)\n\n    return image\n"
  },
  {
    "path": "pytorch_object_detection/ssd/my_dataset.py",
    "content": "from torch.utils.data import Dataset\nimport os\nimport torch\nimport json\nfrom PIL import Image\nfrom lxml import etree\n\n\nclass VOCDataSet(Dataset):\n    \"\"\"读取解析PASCAL VOC2007/2012数据集\"\"\"\n\n    def __init__(self, voc_root, year=\"2012\", transforms=None, train_set='train.txt'):\n        assert year in [\"2007\", \"2012\"], \"year must be in ['2007', '2012']\"\n        # 增加容错能力\n        if \"VOCdevkit\" in voc_root:\n            self.root = os.path.join(voc_root, f\"VOC{year}\")\n        else:\n            self.root = os.path.join(voc_root, \"VOCdevkit\", f\"VOC{year}\")\n        self.img_root = os.path.join(self.root, \"JPEGImages\")\n        self.annotations_root = os.path.join(self.root, \"Annotations\")\n\n        txt_list = os.path.join(self.root, \"ImageSets\", \"Main\", train_set)\n\n        with open(txt_list) as read:\n            self.xml_list = [os.path.join(self.annotations_root, line.strip() + \".xml\")\n                             for line in read.readlines() if len(line.strip()) > 0]\n\n        # read class_indict\n        json_file = \"./pascal_voc_classes.json\"\n        assert os.path.exists(json_file), \"{} file not exist.\".format(json_file)\n        with open(json_file, 'r') as f:\n            self.class_dict = json.load(f)\n\n        self.transforms = transforms\n\n    def __len__(self):\n        return len(self.xml_list)\n\n    def __getitem__(self, idx):\n        # read xml\n        xml_path = self.xml_list[idx]\n        with open(xml_path) as fid:\n            xml_str = fid.read()\n        xml = etree.fromstring(xml_str)\n        data = self.parse_xml_to_dict(xml)[\"annotation\"]\n        data_height = int(data[\"size\"][\"height\"])\n        data_width = int(data[\"size\"][\"width\"])\n        height_width = [data_height, data_width]\n        img_path = os.path.join(self.img_root, data[\"filename\"])\n        image = Image.open(img_path)\n        if image.format != \"JPEG\":\n            raise ValueError(\"Image '{}' format not JPEG\".format(img_path))\n\n        assert \"object\" in data, \"{} lack of object information.\".format(xml_path)\n        boxes = []\n        labels = []\n        iscrowd = []\n        for obj in data[\"object\"]:\n            # 将所有的gt box信息转换成相对值0-1之间\n            xmin = float(obj[\"bndbox\"][\"xmin\"]) / data_width\n            xmax = float(obj[\"bndbox\"][\"xmax\"]) / data_width\n            ymin = float(obj[\"bndbox\"][\"ymin\"]) / data_height\n            ymax = float(obj[\"bndbox\"][\"ymax\"]) / data_height\n\n            # 进一步检查数据，有的标注信息中可能有w或h为0的情况，这样的数据会导致计算回归loss为nan\n            if xmax <= xmin or ymax <= ymin:\n                print(\"Warning: in '{}' xml, there are some bbox w/h <=0\".format(xml_path))\n                continue\n                \n            boxes.append([xmin, ymin, xmax, ymax])\n            labels.append(self.class_dict[obj[\"name\"]])\n            if \"difficult\" in obj:\n                iscrowd.append(int(obj[\"difficult\"]))\n            else:\n                iscrowd.append(0)\n\n        # convert everything into a torch.Tensor\n        boxes = torch.as_tensor(boxes, dtype=torch.float32)\n        labels = torch.as_tensor(labels, dtype=torch.int64)\n        iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)\n        height_width = torch.as_tensor(height_width, dtype=torch.int64)\n        image_id = torch.tensor([idx])\n        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])\n\n        target = {}\n        target[\"boxes\"] = boxes\n        target[\"labels\"] = labels\n        target[\"image_id\"] = image_id\n        target[\"area\"] = area\n        target[\"iscrowd\"] = iscrowd\n        target[\"height_width\"] = height_width\n\n        if self.transforms is not None:\n            image, target = self.transforms(image, target)\n\n        return image, target\n\n    def get_height_and_width(self, idx):\n        # read xml\n        xml_path = self.xml_list[idx]\n        with open(xml_path) as fid:\n            xml_str = fid.read()\n        xml = etree.fromstring(xml_str)\n        data = self.parse_xml_to_dict(xml)[\"annotation\"]\n        data_height = int(data[\"size\"][\"height\"])\n        data_width = int(data[\"size\"][\"width\"])\n        return data_height, data_width\n\n    def parse_xml_to_dict(self, xml):\n        \"\"\"\n        将xml文件解析成字典形式，参考tensorflow的recursive_parse_xml_to_dict\n        Args：\n            xml: xml tree obtained by parsing XML file contents using lxml.etree\n\n        Returns:\n            Python dictionary holding XML contents.\n        \"\"\"\n\n        if len(xml) == 0:  # 遍历到底层，直接返回tag对应的信息\n            return {xml.tag: xml.text}\n\n        result = {}\n        for child in xml:\n            child_result = self.parse_xml_to_dict(child)  # 递归遍历标签信息\n            if child.tag != 'object':\n                result[child.tag] = child_result[child.tag]\n            else:\n                if child.tag not in result:  # 因为object可能有多个，所以需要放入列表里\n                    result[child.tag] = []\n                result[child.tag].append(child_result[child.tag])\n        return {xml.tag: result}\n\n    def coco_index(self, idx):\n        \"\"\"\n        该方法是专门为pycocotools统计标签信息准备，不对图像和标签作任何处理\n        由于不用去读取图片，可大幅缩减统计时间\n\n        Args:\n            idx: 输入需要获取图像的索引\n        \"\"\"\n        # read xml\n        xml_path = self.xml_list[idx]\n        with open(xml_path) as fid:\n            xml_str = fid.read()\n        xml = etree.fromstring(xml_str)\n        data = self.parse_xml_to_dict(xml)[\"annotation\"]\n        data_height = int(data[\"size\"][\"height\"])\n        data_width = int(data[\"size\"][\"width\"])\n        height_width = [data_height, data_width]\n        # img_path = os.path.join(self.img_root, data[\"filename\"])\n        # image = Image.open(img_path)\n        # if image.format != \"JPEG\":\n        #     raise ValueError(\"Image format not JPEG\")\n        boxes = []\n        labels = []\n        iscrowd = []\n        for obj in data[\"object\"]:\n            # 将所有的gt box信息转换成相对值0-1之间\n            xmin = float(obj[\"bndbox\"][\"xmin\"]) / data_width\n            xmax = float(obj[\"bndbox\"][\"xmax\"]) / data_width\n            ymin = float(obj[\"bndbox\"][\"ymin\"]) / data_height\n            ymax = float(obj[\"bndbox\"][\"ymax\"]) / data_height\n            boxes.append([xmin, ymin, xmax, ymax])\n            labels.append(self.class_dict[obj[\"name\"]])\n            iscrowd.append(int(obj[\"difficult\"]))\n\n        # convert everything into a torch.Tensor\n        boxes = torch.as_tensor(boxes, dtype=torch.float32)\n        labels = torch.as_tensor(labels, dtype=torch.int64)\n        iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)\n        height_width = torch.as_tensor(height_width, dtype=torch.int64)\n        image_id = torch.tensor([idx])\n        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])\n\n        target = {}\n        target[\"boxes\"] = boxes\n        target[\"labels\"] = labels\n        target[\"image_id\"] = image_id\n        target[\"area\"] = area\n        target[\"iscrowd\"] = iscrowd\n        target[\"height_width\"] = height_width\n\n        return target\n\n    @staticmethod\n    def collate_fn(batch):\n        images, targets = tuple(zip(*batch))\n        # images = torch.stack(images, dim=0)\n        #\n        # boxes = []\n        # labels = []\n        # img_id = []\n        # for t in targets:\n        #     boxes.append(t['boxes'])\n        #     labels.append(t['labels'])\n        #     img_id.append(t[\"image_id\"])\n        # targets = {\"boxes\": torch.stack(boxes, dim=0),\n        #            \"labels\": torch.stack(labels, dim=0),\n        #            \"image_id\": torch.as_tensor(img_id)}\n\n        return images, targets\n\n# import transforms\n# from draw_box_utils import draw_objs\n# from PIL import Image\n# import json\n# import matplotlib.pyplot as plt\n# import torchvision.transforms as ts\n# import random\n#\n# # read class_indict\n# category_index = {}\n# try:\n#     json_file = open('./pascal_voc_classes.json', 'r')\n#     class_dict = json.load(json_file)\n#     category_index = {str(v): str(k) for k, v in class_dict.items()}\n# except Exception as e:\n#     print(e)\n#     exit(-1)\n#\n# data_transform = {\n#     \"train\": transforms.Compose([transforms.ToTensor(),\n#                                  transforms.RandomHorizontalFlip(0.5)]),\n#     \"val\": transforms.Compose([transforms.ToTensor()])\n# }\n#\n# # load train data set\n# train_data_set = VOCDataSet(os.getcwd(), \"2012\", data_transform[\"train\"], \"train.txt\")\n# print(len(train_data_set))\n# for index in random.sample(range(0, len(train_data_set)), k=5):\n#     img, target = train_data_set[index]\n#     img = ts.ToPILImage()(img)\n#     plot_img = draw_objs(img,\n#                          target[\"boxes\"].numpy(),\n#                          target[\"labels\"].numpy(),\n#                          np.ones(target[\"labels\"].shape[0]),\n#                          category_index=category_index,\n#                          box_thresh=0.5,\n#                          line_thickness=3,\n#                          font='arial.ttf',\n#                          font_size=20)\n#     plt.imshow(plot_img)\n#     plt.show()\n"
  },
  {
    "path": "pytorch_object_detection/ssd/pascal_voc_classes.json",
    "content": "{\n    \"aeroplane\": 1,\n    \"bicycle\": 2,\n    \"bird\": 3,\n    \"boat\": 4,\n    \"bottle\": 5,\n    \"bus\": 6,\n    \"car\": 7,\n    \"cat\": 8,\n    \"chair\": 9,\n    \"cow\": 10,\n    \"diningtable\": 11,\n    \"dog\": 12,\n    \"horse\": 13,\n    \"motorbike\": 14,\n    \"person\": 15,\n    \"pottedplant\": 16,\n    \"sheep\": 17,\n    \"sofa\": 18,\n    \"train\": 19,\n    \"tvmonitor\": 20\n}"
  },
  {
    "path": "pytorch_object_detection/ssd/plot_curve.py",
    "content": "import datetime\nimport matplotlib.pyplot as plt\n\n\ndef plot_loss_and_lr(train_loss, learning_rate):\n    try:\n        x = list(range(len(train_loss)))\n        fig, ax1 = plt.subplots(1, 1)\n        ax1.plot(x, train_loss, 'r', label='loss')\n        ax1.set_xlabel(\"epoch\")\n        ax1.set_ylabel(\"loss\")\n        ax1.set_title(\"Train Loss and lr\")\n        plt.legend(loc='best')\n\n        ax2 = ax1.twinx()\n        ax2.plot(x, learning_rate, label='lr')\n        ax2.set_ylabel(\"learning rate\")\n        ax2.set_xlim(0, len(train_loss))  # 设置横坐标整数间隔\n        plt.legend(loc='best')\n\n        handles1, labels1 = ax1.get_legend_handles_labels()\n        handles2, labels2 = ax2.get_legend_handles_labels()\n        plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right')\n\n        fig.subplots_adjust(right=0.8)  # 防止出现保存图片显示不全的情况\n        fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")))\n        plt.close()\n        print(\"successful save loss curve! \")\n    except Exception as e:\n        print(e)\n\n\ndef plot_map(mAP):\n    try:\n        x = list(range(len(mAP)))\n        plt.plot(x, mAP, label='mAp')\n        plt.xlabel('epoch')\n        plt.ylabel('mAP')\n        plt.title('Eval mAP')\n        plt.xlim(0, len(mAP))\n        plt.legend(loc='best')\n        plt.savefig('./mAP.png')\n        plt.close()\n        print(\"successful save mAP curve!\")\n    except Exception as e:\n        print(e)\n"
  },
  {
    "path": "pytorch_object_detection/ssd/predict_test.py",
    "content": "import os\nimport json\nimport time\n\nimport torch\nfrom PIL import Image\nimport matplotlib.pyplot as plt\n\nimport transforms\nfrom src import SSD300, Backbone\nfrom draw_box_utils import draw_objs\n\n\ndef create_model(num_classes):\n    backbone = Backbone()\n    model = SSD300(backbone=backbone, num_classes=num_classes)\n\n    return model\n\n\ndef time_synchronized():\n    torch.cuda.synchronize() if torch.cuda.is_available() else None\n    return time.time()\n\n\ndef main():\n    # get devices\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n    print(device)\n\n    # create model\n    # 目标检测数 + 背景\n    num_classes = 20 + 1\n    model = create_model(num_classes=num_classes)\n\n    # load train weights\n    weights_path = \"./save_weights/ssd300-14.pth\"\n    weights_dict = torch.load(weights_path, map_location='cpu')\n    weights_dict = weights_dict[\"model\"] if \"model\" in weights_dict else weights_dict\n    model.load_state_dict(weights_dict)\n    model.to(device)\n\n    # read class_indict\n    json_path = \"./pascal_voc_classes.json\"\n    assert os.path.exists(json_path), \"file '{}' dose not exist.\".format(json_path)\n    json_file = open(json_path, 'r')\n    class_dict = json.load(json_file)\n    json_file.close()\n    category_index = {str(v): str(k) for k, v in class_dict.items()}\n\n    # load image\n    original_img = Image.open(\"./test.jpg\")\n\n    # from pil image to tensor, do not normalize image\n    data_transform = transforms.Compose([transforms.Resize(),\n                                         transforms.ToTensor(),\n                                         transforms.Normalization()])\n    img, _ = data_transform(original_img)\n    # expand batch dimension\n    img = torch.unsqueeze(img, dim=0)\n\n    model.eval()\n    with torch.no_grad():\n        # initial model\n        init_img = torch.zeros((1, 3, 300, 300), device=device)\n        model(init_img)\n\n        time_start = time_synchronized()\n        predictions = model(img.to(device))[0]  # bboxes_out, labels_out, scores_out\n        time_end = time_synchronized()\n        print(\"inference+NMS time: {}\".format(time_end - time_start))\n\n        predict_boxes = predictions[0].to(\"cpu\").numpy()\n        predict_boxes[:, [0, 2]] = predict_boxes[:, [0, 2]] * original_img.size[0]\n        predict_boxes[:, [1, 3]] = predict_boxes[:, [1, 3]] * original_img.size[1]\n        predict_classes = predictions[1].to(\"cpu\").numpy()\n        predict_scores = predictions[2].to(\"cpu\").numpy()\n\n        if len(predict_boxes) == 0:\n            print(\"没有检测到任何目标!\")\n\n        plot_img = draw_objs(original_img,\n                             predict_boxes,\n                             predict_classes,\n                             predict_scores,\n                             category_index=category_index,\n                             box_thresh=0.5,\n                             line_thickness=3,\n                             font='arial.ttf',\n                             font_size=20)\n        plt.imshow(plot_img)\n        plt.show()\n        # 保存预测的图片结果\n        plot_img.save(\"test_result.jpg\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "pytorch_object_detection/ssd/record_mAP.txt",
    "content": "COCO results:\n Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.448\n Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.721\n Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.482\n Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.099\n Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.280\n Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.521\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.418\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.565\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.573\n Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.166\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.419\n Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.641\n\nmAP(IoU=0.5) for each category:\n aeroplane      : 0.8532360243584314\n bicycle        : 0.7496603797780927\n bird           : 0.7658478672087958\n boat           : 0.6079142920471263\n bottle         : 0.4986565020053691\n bus            : 0.8229568428349553\n car            : 0.7940868387465018\n cat            : 0.8800145761338203\n chair          : 0.5090524550010037\n cow            : 0.7344958411899583\n diningtable    : 0.5379541883401677\n dog            : 0.8230037525430133\n horse          : 0.7880475852689804\n motorbike      : 0.7879788462924051\n person         : 0.8351553291238482\n pottedplant    : 0.4420858247895347\n sheep          : 0.7466344247593008\n sofa           : 0.6627392793997164\n train          : 0.8380502070312741\n tvmonitor      : 0.7445168617489237"
  },
  {
    "path": "pytorch_object_detection/ssd/requirements.txt",
    "content": "numpy\nmatplotlib\ntqdm\npycocotools\ntorch==1.7.1\ntorchvision==0.8.2\nlxml\nPillow\n"
  },
  {
    "path": "pytorch_object_detection/ssd/src/__init__.py",
    "content": "from .res50_backbone import resnet50\nfrom .ssd_model import SSD300, Backbone\nfrom .utils import dboxes300_coco, calc_iou_tensor, Encoder, PostProcess\n"
  },
  {
    "path": "pytorch_object_detection/ssd/src/res50_backbone.py",
    "content": "import torch.nn as nn\nimport torch\n\n\nclass Bottleneck(nn.Module):\n    expansion = 4\n\n    def __init__(self, in_channel, out_channel, stride=1, downsample=None):\n        super(Bottleneck, self).__init__()\n        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,\n                               kernel_size=1, stride=1, bias=False)  # squeeze channels\n        self.bn1 = nn.BatchNorm2d(out_channel)\n        # -----------------------------------------\n        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,\n                               kernel_size=3, stride=stride, bias=False, padding=1)\n        self.bn2 = nn.BatchNorm2d(out_channel)\n        # -----------------------------------------\n        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion,\n                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels\n        self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n\n    def forward(self, x):\n        identity = x\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass ResNet(nn.Module):\n\n    def __init__(self, block, blocks_num, num_classes=1000, include_top=True):\n        super(ResNet, self).__init__()\n        self.include_top = include_top\n        self.in_channel = 64\n\n        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,\n                               padding=3, bias=False)\n        self.bn1 = nn.BatchNorm2d(self.in_channel)\n        self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.layer1 = self._make_layer(block, 64, blocks_num[0])\n        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)\n        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)\n        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)\n        if self.include_top:\n            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)\n            self.fc = nn.Linear(512 * block.expansion, num_classes)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')\n\n    def _make_layer(self, block, channel, block_num, stride=1):\n        downsample = None\n        if stride != 1 or self.in_channel != channel * block.expansion:\n            downsample = nn.Sequential(\n                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),\n                nn.BatchNorm2d(channel * block.expansion))\n\n        layers = []\n        layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride))\n        self.in_channel = channel * block.expansion\n\n        for _ in range(1, block_num):\n            layers.append(block(self.in_channel, channel))\n\n        return nn.Sequential(*layers)\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n\n        if self.include_top:\n            x = self.avgpool(x)\n            x = torch.flatten(x, 1)\n            x = self.fc(x)\n\n        return x\n\n\ndef resnet50(num_classes=1000, include_top=True):\n    return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)\n"
  },
  {
    "path": "pytorch_object_detection/ssd/src/ssd_model.py",
    "content": "import torch\nfrom torch import nn, Tensor\nfrom torch.jit.annotations import List\n\nfrom .res50_backbone import resnet50\nfrom .utils import dboxes300_coco, Encoder, PostProcess\n\n\nclass Backbone(nn.Module):\n    def __init__(self, pretrain_path=None):\n        super(Backbone, self).__init__()\n        net = resnet50()\n        self.out_channels = [1024, 512, 512, 256, 256, 256]\n\n        if pretrain_path is not None:\n            net.load_state_dict(torch.load(pretrain_path))\n\n        self.feature_extractor = nn.Sequential(*list(net.children())[:7])\n\n        conv4_block1 = self.feature_extractor[-1][0]\n\n        # 修改conv4_block1的步距，从2->1\n        conv4_block1.conv1.stride = (1, 1)\n        conv4_block1.conv2.stride = (1, 1)\n        conv4_block1.downsample[0].stride = (1, 1)\n\n    def forward(self, x):\n        x = self.feature_extractor(x)\n        return x\n\n\nclass SSD300(nn.Module):\n    def __init__(self, backbone=None, num_classes=21):\n        super(SSD300, self).__init__()\n        if backbone is None:\n            raise Exception(\"backbone is None\")\n        if not hasattr(backbone, \"out_channels\"):\n            raise Exception(\"the backbone not has attribute: out_channel\")\n        self.feature_extractor = backbone\n\n        self.num_classes = num_classes\n        # out_channels = [1024, 512, 512, 256, 256, 256] for resnet50\n        self._build_additional_features(self.feature_extractor.out_channels)\n        self.num_defaults = [4, 6, 6, 6, 4, 4]\n        location_extractors = []\n        confidence_extractors = []\n\n        # out_channels = [1024, 512, 512, 256, 256, 256] for resnet50\n        for nd, oc in zip(self.num_defaults, self.feature_extractor.out_channels):\n            # nd is number_default_boxes, oc is output_channel\n            location_extractors.append(nn.Conv2d(oc, nd * 4, kernel_size=3, padding=1))\n            confidence_extractors.append(nn.Conv2d(oc, nd * self.num_classes, kernel_size=3, padding=1))\n\n        self.loc = nn.ModuleList(location_extractors)\n        self.conf = nn.ModuleList(confidence_extractors)\n        self._init_weights()\n\n        default_box = dboxes300_coco()\n        self.compute_loss = Loss(default_box)\n        self.encoder = Encoder(default_box)\n        self.postprocess = PostProcess(default_box)\n\n    def _build_additional_features(self, input_size):\n        \"\"\"\n        为backbone(resnet50)添加额外的一系列卷积层，得到相应的一系列特征提取器\n        :param input_size:\n        :return:\n        \"\"\"\n        additional_blocks = []\n        # input_size = [1024, 512, 512, 256, 256, 256] for resnet50\n        middle_channels = [256, 256, 128, 128, 128]\n        for i, (input_ch, output_ch, middle_ch) in enumerate(zip(input_size[:-1], input_size[1:], middle_channels)):\n            padding, stride = (1, 2) if i < 3 else (0, 1)\n            layer = nn.Sequential(\n                nn.Conv2d(input_ch, middle_ch, kernel_size=1, bias=False),\n                nn.BatchNorm2d(middle_ch),\n                nn.ReLU(inplace=True),\n                nn.Conv2d(middle_ch, output_ch, kernel_size=3, padding=padding, stride=stride, bias=False),\n                nn.BatchNorm2d(output_ch),\n                nn.ReLU(inplace=True),\n            )\n            additional_blocks.append(layer)\n        self.additional_blocks = nn.ModuleList(additional_blocks)\n\n    def _init_weights(self):\n        layers = [*self.additional_blocks, *self.loc, *self.conf]\n        for layer in layers:\n            for param in layer.parameters():\n                if param.dim() > 1:\n                    nn.init.xavier_uniform_(param)\n\n    # Shape the classifier to the view of bboxes\n    def bbox_view(self, features, loc_extractor, conf_extractor):\n        locs = []\n        confs = []\n        for f, l, c in zip(features, loc_extractor, conf_extractor):\n            # [batch, n*4, feat_size, feat_size] -> [batch, 4, -1]\n            locs.append(l(f).view(f.size(0), 4, -1))\n            # [batch, n*classes, feat_size, feat_size] -> [batch, classes, -1]\n            confs.append(c(f).view(f.size(0), self.num_classes, -1))\n\n        locs, confs = torch.cat(locs, 2).contiguous(), torch.cat(confs, 2).contiguous()\n        return locs, confs\n\n    def forward(self, image, targets=None):\n        x = self.feature_extractor(image)\n\n        # Feature Map 38x38x1024, 19x19x512, 10x10x512, 5x5x256, 3x3x256, 1x1x256\n        detection_features = torch.jit.annotate(List[Tensor], [])  # [x]\n        detection_features.append(x)\n        for layer in self.additional_blocks:\n            x = layer(x)\n            detection_features.append(x)\n\n        # Feature Map 38x38x4, 19x19x6, 10x10x6, 5x5x6, 3x3x4, 1x1x4\n        locs, confs = self.bbox_view(detection_features, self.loc, self.conf)\n\n        # For SSD 300, shall return nbatch x 8732 x {nlabels, nlocs} results\n        # 38x38x4 + 19x19x6 + 10x10x6 + 5x5x6 + 3x3x4 + 1x1x4 = 8732\n\n        if self.training:\n            if targets is None:\n                raise ValueError(\"In training mode, targets should be passed\")\n            # bboxes_out (Tensor 8732 x 4), labels_out (Tensor 8732)\n            bboxes_out = targets['boxes']\n            bboxes_out = bboxes_out.transpose(1, 2).contiguous()\n            # print(bboxes_out.is_contiguous())\n            labels_out = targets['labels']\n            # print(labels_out.is_contiguous())\n\n            # ploc, plabel, gloc, glabel\n            loss = self.compute_loss(locs, confs, bboxes_out, labels_out)\n            return {\"total_losses\": loss}\n\n        # 将预测回归参数叠加到default box上得到最终预测box，并执行非极大值抑制虑除重叠框\n        # results = self.encoder.decode_batch(locs, confs)\n        results = self.postprocess(locs, confs)\n        return results\n\n\nclass Loss(nn.Module):\n    \"\"\"\n        Implements the loss as the sum of the followings:\n        1. Confidence Loss: All labels, with hard negative mining\n        2. Localization Loss: Only on positive labels\n        Suppose input dboxes has the shape 8732x4\n    \"\"\"\n    def __init__(self, dboxes):\n        super(Loss, self).__init__()\n        # Two factor are from following links\n        # http://jany.st/post/2017-11-05-single-shot-detector-ssd-from-scratch-in-tensorflow.html\n        self.scale_xy = 1.0 / dboxes.scale_xy  # 10\n        self.scale_wh = 1.0 / dboxes.scale_wh  # 5\n\n        self.location_loss = nn.SmoothL1Loss(reduction='none')\n        # [num_anchors, 4] -> [4, num_anchors] -> [1, 4, num_anchors]\n        self.dboxes = nn.Parameter(dboxes(order=\"xywh\").transpose(0, 1).unsqueeze(dim=0),\n                                   requires_grad=False)\n\n        self.confidence_loss = nn.CrossEntropyLoss(reduction='none')\n\n    def _location_vec(self, loc):\n        # type: (Tensor) -> Tensor\n        \"\"\"\n        Generate Location Vectors\n        计算ground truth相对anchors的回归参数\n        :param loc: anchor匹配到的对应GTBOX Nx4x8732\n        :return:\n        \"\"\"\n        gxy = self.scale_xy * (loc[:, :2, :] - self.dboxes[:, :2, :]) / self.dboxes[:, 2:, :]  # Nx2x8732\n        gwh = self.scale_wh * (loc[:, 2:, :] / self.dboxes[:, 2:, :]).log()  # Nx2x8732\n        return torch.cat((gxy, gwh), dim=1).contiguous()\n\n    def forward(self, ploc, plabel, gloc, glabel):\n        # type: (Tensor, Tensor, Tensor, Tensor) -> Tensor\n        \"\"\"\n            ploc, plabel: Nx4x8732, Nxlabel_numx8732\n                predicted location and labels\n\n            gloc, glabel: Nx4x8732, Nx8732\n                ground truth location and labels\n        \"\"\"\n        # 获取正样本的mask  Tensor: [N, 8732]\n        mask = torch.gt(glabel, 0)  # (gt: >)\n        # mask1 = torch.nonzero(glabel)\n        # 计算一个batch中的每张图片的正样本个数 Tensor: [N]\n        pos_num = mask.sum(dim=1)\n\n        # 计算gt的location回归参数 Tensor: [N, 4, 8732]\n        vec_gd = self._location_vec(gloc)\n\n        # sum on four coordinates, and mask\n        # 计算定位损失(只有正样本)\n        loc_loss = self.location_loss(ploc, vec_gd).sum(dim=1)  # Tensor: [N, 8732]\n        loc_loss = (mask.float() * loc_loss).sum(dim=1)  # Tenosr: [N]\n\n        # hard negative mining Tenosr: [N, 8732]\n        con = self.confidence_loss(plabel, glabel)\n\n        # positive mask will never selected\n        # 获取负样本\n        con_neg = con.clone()\n        con_neg[mask] = 0.0\n        # 按照confidence_loss降序排列 con_idx(Tensor: [N, 8732])\n        _, con_idx = con_neg.sort(dim=1, descending=True)\n        _, con_rank = con_idx.sort(dim=1)  # 这个步骤比较巧妙\n\n        # number of negative three times positive\n        # 用于损失计算的负样本数是正样本的3倍（在原论文Hard negative mining部分），\n        # 但不能超过总样本数8732\n        neg_num = torch.clamp(3 * pos_num, max=mask.size(1)).unsqueeze(-1)\n        neg_mask = torch.lt(con_rank, neg_num)  # (lt: <) Tensor [N, 8732]\n\n        # confidence最终loss使用选取的正样本loss+选取的负样本loss\n        con_loss = (con * (mask.float() + neg_mask.float())).sum(dim=1)  # Tensor [N]\n\n        # avoid no object detected\n        # 避免出现图像中没有GTBOX的情况\n        total_loss = loc_loss + con_loss\n        # eg. [15, 3, 5, 0] -> [1.0, 1.0, 1.0, 0.0]\n        num_mask = torch.gt(pos_num, 0).float()  # 统计一个batch中的每张图像中是否存在正样本\n        pos_num = pos_num.float().clamp(min=1e-6)  # 防止出现分母为零的情况\n        ret = (total_loss * num_mask / pos_num).mean(dim=0)  # 只计算存在正样本的图像损失\n        return ret\n\n"
  },
  {
    "path": "pytorch_object_detection/ssd/src/utils.py",
    "content": "from math import sqrt\nimport itertools\n\nimport torch\nimport torch.nn.functional as F\nfrom torch.jit.annotations import Tuple, List\nfrom torch import nn, Tensor\nimport numpy as np\n\n\n# This function is from https://github.com/kuangliu/pytorch-ssd.\n# def calc_iou_tensor(box1, box2):\n#     \"\"\" Calculation of IoU based on two boxes tensor,\n#         Reference to https://github.com/kuangliu/pytorch-src\n#         input:\n#             box1 (N, 4)  format [xmin, ymin, xmax, ymax]\n#             box2 (M, 4)  format [xmin, ymin, xmax, ymax]\n#         output:\n#             IoU (N, M)\n#     \"\"\"\n#     N = box1.size(0)\n#     M = box2.size(0)\n#\n#     # (N, 4) -> (N, 1, 4) -> (N, M, 4)\n#     be1 = box1.unsqueeze(1).expand(-1, M, -1)  # -1 means not changing the size of that dimension\n#     # (M, 4) -> (1, M, 4) -> (N, M, 4)\n#     be2 = box2.unsqueeze(0).expand(N, -1, -1)\n#\n#     # Left Top and Right Bottom\n#     lt = torch.max(be1[:, :, :2], be2[:, :, :2])\n#     rb = torch.min(be1[:, :, 2:], be2[:, :, 2:])\n#\n#     # compute intersection area\n#     delta = rb - lt  # width and height\n#     delta[delta < 0] = 0\n#     # width * height\n#     intersect = delta[:, :, 0] * delta[:, :, 1]\n#\n#     # compute bel1 area\n#     delta1 = be1[:, :, 2:] - be1[:, :, :2]\n#     area1 = delta1[:, :, 0] * delta1[:, :, 1]\n#     # compute bel2 area\n#     delta2 = be2[:, :, 2:] - be2[:, :, :2]\n#     area2 = delta2[:, :, 0] * delta2[:, :, 1]\n#\n#     iou = intersect / (area1 + area2 - intersect)\n#     return iou\n\n\ndef box_area(boxes):\n    \"\"\"\n    Computes the area of a set of bounding boxes, which are specified by its\n    (x1, y1, x2, y2) coordinates.\n\n    Arguments:\n        boxes (Tensor[N, 4]): boxes for which the area will be computed. They\n            are expected to be in (x1, y1, x2, y2) format\n\n    Returns:\n        area (Tensor[N]): area for each box\n    \"\"\"\n    return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])\n\n\ndef calc_iou_tensor(boxes1, boxes2):\n    \"\"\"\n    Return intersection-over-union (Jaccard index) of boxes.\n\n    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.\n\n    Arguments:\n        boxes1 (Tensor[N, 4])\n        boxes2 (Tensor[M, 4])\n\n    Returns:\n        iou (Tensor[N, M]): the NxM matrix containing the pairwise\n            IoU values for every element in boxes1 and boxes2\n    \"\"\"\n    area1 = box_area(boxes1)\n    area2 = box_area(boxes2)\n\n    #  When the shapes do not match,\n    #  the shape of the returned output tensor follows the broadcasting rules\n    lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # left-top [N,M,2]\n    rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # right-bottom [N,M,2]\n\n    wh = (rb - lt).clamp(min=0)  # [N,M,2]\n    inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]\n\n    iou = inter / (area1[:, None] + area2 - inter)\n    return iou\n\n\n# This function is from https://github.com/kuangliu/pytorch-ssd.\nclass Encoder(object):\n    \"\"\"\n        Inspired by https://github.com/kuangliu/pytorch-src\n        Transform between (bboxes, lables) <-> SSD output\n\n        dboxes: default boxes in size 8732 x 4,\n            encoder: input ltrb format, output xywh format\n            decoder: input xywh format, output ltrb format\n\n        encode:\n            input  : bboxes_in (Tensor nboxes x 4), labels_in (Tensor nboxes)\n            output : bboxes_out (Tensor 8732 x 4), labels_out (Tensor 8732)\n            criteria : IoU threshold of bboexes\n\n        decode:\n            input  : bboxes_in (Tensor 8732 x 4), scores_in (Tensor 8732 x nitems)\n            output : bboxes_out (Tensor nboxes x 4), labels_out (Tensor nboxes)\n            criteria : IoU threshold of bboexes\n            max_output : maximum number of output bboxes\n    \"\"\"\n    def __init__(self, dboxes):\n        self.dboxes = dboxes(order='ltrb')\n        self.dboxes_xywh = dboxes(order='xywh').unsqueeze(dim=0)\n        self.nboxes = self.dboxes.size(0)  # default boxes的数量\n        self.scale_xy = dboxes.scale_xy\n        self.scale_wh = dboxes.scale_wh\n\n    def encode(self, bboxes_in, labels_in, criteria=0.5):\n        \"\"\"\n        encode:\n            input  : bboxes_in (Tensor nboxes x 4), labels_in (Tensor nboxes)\n            output : bboxes_out (Tensor 8732 x 4), labels_out (Tensor 8732)\n            criteria : IoU threshold of bboexes\n        \"\"\"\n        # [nboxes, 8732]\n        ious = calc_iou_tensor(bboxes_in, self.dboxes)  # 计算每个GT与default box的iou\n        # [8732,]\n        best_dbox_ious, best_dbox_idx = ious.max(dim=0)  # 寻找每个default box匹配到的最大IoU\n        # [nboxes,]\n        best_bbox_ious, best_bbox_idx = ious.max(dim=1)  # 寻找每个GT匹配到的最大IoU\n\n        # 将每个GT匹配到的最佳default box设置为正样本（对应论文中Matching strategy的第一条）\n        # set best ious 2.0\n        best_dbox_ious.index_fill_(0, best_bbox_idx, 2.0)  # dim, index, value\n        # 将相应default box匹配最大IOU的GT索引进行替换\n        idx = torch.arange(0, best_bbox_idx.size(0), dtype=torch.int64)\n        best_dbox_idx[best_bbox_idx[idx]] = idx\n\n        # filter IoU > 0.5\n        # 寻找与GT iou大于0.5的default box,对应论文中Matching strategy的第二条(这里包括了第一条匹配到的信息)\n        masks = best_dbox_ious > criteria\n        # [8732,]\n        labels_out = torch.zeros(self.nboxes, dtype=torch.int64)\n        labels_out[masks] = labels_in[best_dbox_idx[masks]]\n        # 将default box匹配到正样本的位置设置成对应GT的box信息\n        bboxes_out = self.dboxes.clone()\n        bboxes_out[masks, :] = bboxes_in[best_dbox_idx[masks], :]\n\n        # Transform format to xywh format\n        x = 0.5 * (bboxes_out[:, 0] + bboxes_out[:, 2])  # x\n        y = 0.5 * (bboxes_out[:, 1] + bboxes_out[:, 3])  # y\n        w = bboxes_out[:, 2] - bboxes_out[:, 0]  # w\n        h = bboxes_out[:, 3] - bboxes_out[:, 1]  # h\n        bboxes_out[:, 0] = x\n        bboxes_out[:, 1] = y\n        bboxes_out[:, 2] = w\n        bboxes_out[:, 3] = h\n        return bboxes_out, labels_out\n\n    def scale_back_batch(self, bboxes_in, scores_in):\n        \"\"\"\n            将box格式从xywh转换回ltrb, 将预测目标score通过softmax处理\n            Do scale and transform from xywh to ltrb\n            suppose input N x 4 x num_bbox | N x label_num x num_bbox\n\n            bboxes_in: 是网络预测的xywh回归参数\n            scores_in: 是预测的每个default box的各目标概率\n        \"\"\"\n        if bboxes_in.device == torch.device(\"cpu\"):\n            self.dboxes = self.dboxes.cpu()\n            self.dboxes_xywh = self.dboxes_xywh.cpu()\n        else:\n            self.dboxes = self.dboxes.cuda()\n            self.dboxes_xywh = self.dboxes_xywh.cuda()\n\n        # Returns a view of the original tensor with its dimensions permuted.\n        bboxes_in = bboxes_in.permute(0, 2, 1)\n        scores_in = scores_in.permute(0, 2, 1)\n        # print(bboxes_in.is_contiguous())\n\n        bboxes_in[:, :, :2] = self.scale_xy * bboxes_in[:, :, :2]   # 预测的x, y回归参数\n        bboxes_in[:, :, 2:] = self.scale_wh * bboxes_in[:, :, 2:]   # 预测的w, h回归参数\n\n        # 将预测的回归参数叠加到default box上得到最终的预测边界框\n        bboxes_in[:, :, :2] = bboxes_in[:, :, :2] * self.dboxes_xywh[:, :, 2:] + self.dboxes_xywh[:, :, :2]\n        bboxes_in[:, :, 2:] = bboxes_in[:, :, 2:].exp() * self.dboxes_xywh[:, :, 2:]\n\n        # transform format to ltrb\n        l = bboxes_in[:, :, 0] - 0.5 * bboxes_in[:, :, 2]\n        t = bboxes_in[:, :, 1] - 0.5 * bboxes_in[:, :, 3]\n        r = bboxes_in[:, :, 0] + 0.5 * bboxes_in[:, :, 2]\n        b = bboxes_in[:, :, 1] + 0.5 * bboxes_in[:, :, 3]\n\n        bboxes_in[:, :, 0] = l  # xmin\n        bboxes_in[:, :, 1] = t  # ymin\n        bboxes_in[:, :, 2] = r  # xmax\n        bboxes_in[:, :, 3] = b  # ymax\n\n        return bboxes_in, F.softmax(scores_in, dim=-1)\n\n    def decode_batch(self, bboxes_in, scores_in, criteria=0.45, max_output=200):\n        # 将box格式从xywh转换回ltrb（方便后面非极大值抑制时求iou）, 将预测目标score通过softmax处理\n        bboxes, probs = self.scale_back_batch(bboxes_in, scores_in)\n\n        outputs = []\n        # 遍历一个batch中的每张image数据\n        for bbox, prob in zip(bboxes.split(1, 0), probs.split(1, 0)):\n            bbox = bbox.squeeze(0)\n            prob = prob.squeeze(0)\n            outputs.append(self.decode_single_new(bbox, prob, criteria, max_output))\n        return outputs\n\n    def decode_single_new(self, bboxes_in, scores_in, criteria, num_output=200):\n        \"\"\"\n        decode:\n            input  : bboxes_in (Tensor 8732 x 4), scores_in (Tensor 8732 x nitems)\n            output : bboxes_out (Tensor nboxes x 4), labels_out (Tensor nboxes)\n            criteria : IoU threshold of bboexes\n            max_output : maximum number of output bboxes\n        \"\"\"\n        device = bboxes_in.device\n        num_classes = scores_in.shape[-1]\n\n        # 对越界的bbox进行裁剪\n        bboxes_in = bboxes_in.clamp(min=0, max=1)\n\n        # [8732, 4] -> [8732, 21, 4]\n        bboxes_in = bboxes_in.repeat(1, num_classes).reshape(scores_in.shape[0], -1, 4)\n\n        # create labels for each prediction\n        labels = torch.arange(num_classes, device=device)\n        labels = labels.view(1, -1).expand_as(scores_in)\n\n        # remove prediction with the background label\n        # 移除归为背景类别的概率信息\n        bboxes_in = bboxes_in[:, 1:, :]\n        scores_in = scores_in[:, 1:]\n        labels = labels[:, 1:]\n\n        # batch everything, by making every class prediction be a separate instance\n        bboxes_in = bboxes_in.reshape(-1, 4)\n        scores_in = scores_in.reshape(-1)\n        labels = labels.reshape(-1)\n\n        # remove low scoring boxes\n        # 移除低概率目标，self.scores_thresh=0.05\n        inds = torch.nonzero(scores_in > 0.05, as_tuple=False).squeeze(1)\n        bboxes_in, scores_in, labels = bboxes_in[inds], scores_in[inds], labels[inds]\n\n        # remove empty boxes\n        ws, hs = bboxes_in[:, 2] - bboxes_in[:, 0], bboxes_in[:, 3] - bboxes_in[:, 1]\n        keep = (ws >= 0.1 / 300) & (hs >= 0.1 / 300)\n        keep = keep.nonzero(as_tuple=False).squeeze(1)\n        bboxes_in, scores_in, labels = bboxes_in[keep], scores_in[keep], labels[keep]\n\n        # non-maximum suppression\n        keep = batched_nms(bboxes_in, scores_in, labels, iou_threshold=criteria)\n\n        # keep only topk scoring predictions\n        keep = keep[:num_output]\n        bboxes_out = bboxes_in[keep, :]\n        scores_out = scores_in[keep]\n        labels_out = labels[keep]\n\n        return bboxes_out, labels_out, scores_out\n\n    # perform non-maximum suppression\n    def decode_single(self, bboxes_in, scores_in, criteria, max_output, max_num=200):\n        \"\"\"\n        decode:\n            input  : bboxes_in (Tensor 8732 x 4), scores_in (Tensor 8732 x nitems)\n            output : bboxes_out (Tensor nboxes x 4), labels_out (Tensor nboxes)\n            criteria : IoU threshold of bboexes\n            max_output : maximum number of output bboxes\n        \"\"\"\n        # Reference to https://github.com/amdegroot/ssd.pytorch\n        bboxes_out = []\n        scores_out = []\n        labels_out = []\n\n        # 非极大值抑制算法\n        # scores_in (Tensor 8732 x nitems), 遍历返回每一列数据，即8732个目标的同一类别的概率\n        for i, score in enumerate(scores_in.split(1, 1)):\n            # skip background\n            if i == 0:\n                continue\n\n            # [8732, 1] -> [8732]\n            score = score.squeeze(1)\n\n            # 虑除预测概率小于0.05的目标\n            mask = score > 0.05\n            bboxes, score = bboxes_in[mask, :], score[mask]\n            if score.size(0) == 0:\n                continue\n\n            # 按照分数从小到大排序\n            score_sorted, score_idx_sorted = score.sort(dim=0)\n\n            # select max_output indices\n            score_idx_sorted = score_idx_sorted[-max_num:]\n            candidates = []\n\n            while score_idx_sorted.numel() > 0:\n                idx = score_idx_sorted[-1].item()\n                # 获取排名前score_idx_sorted名的bboxes信息 Tensor:[score_idx_sorted, 4]\n                bboxes_sorted = bboxes[score_idx_sorted, :]\n                # 获取排名第一的bboxes信息 Tensor:[4]\n                bboxes_idx = bboxes[idx, :].unsqueeze(dim=0)\n                # 计算前score_idx_sorted名的bboxes与第一名的bboxes的iou\n                iou_sorted = calc_iou_tensor(bboxes_sorted, bboxes_idx).squeeze()\n\n                # we only need iou < criteria\n                # 丢弃与第一名iou > criteria的所有目标(包括自己本身)\n                score_idx_sorted = score_idx_sorted[iou_sorted < criteria]\n                # 保存第一名的索引信息\n                candidates.append(idx)\n\n            # 保存该类别通过非极大值抑制后的目标信息\n            bboxes_out.append(bboxes[candidates, :])   # bbox坐标信息\n            scores_out.append(score[candidates])       # score信息\n            labels_out.extend([i] * len(candidates))   # 标签信息\n\n        if not bboxes_out:  # 如果为空的话，返回空tensor，注意boxes对应的空tensor size，防止验证时出错\n            return [torch.empty(size=(0, 4)), torch.empty(size=(0,), dtype=torch.int64), torch.empty(size=(0,))]\n\n        bboxes_out = torch.cat(bboxes_out, dim=0).contiguous()\n        scores_out = torch.cat(scores_out, dim=0).contiguous()\n        labels_out = torch.as_tensor(labels_out, dtype=torch.long)\n\n        # 对所有目标的概率进行排序（无论是什 么类别）,取前max_num个目标\n        _, max_ids = scores_out.sort(dim=0)\n        max_ids = max_ids[-max_output:]\n        return bboxes_out[max_ids, :], labels_out[max_ids], scores_out[max_ids]\n\n\nclass DefaultBoxes(object):\n    def __init__(self, fig_size, feat_size, steps, scales, aspect_ratios, scale_xy=0.1, scale_wh=0.2):\n        self.fig_size = fig_size   # 输入网络的图像大小 300\n        # [38, 19, 10, 5, 3, 1]\n        self.feat_size = feat_size  # 每个预测层的feature map尺寸\n\n        self.scale_xy_ = scale_xy\n        self.scale_wh_ = scale_wh\n\n        # According to https://github.com/weiliu89/caffe\n        # Calculation method slightly different from paper\n        # [8, 16, 32, 64, 100, 300]\n        self.steps = steps    # 每个特征层上的一个cell在原图上的跨度\n\n        # [21, 45, 99, 153, 207, 261, 315]\n        self.scales = scales  # 每个特征层上预测的default box的scale\n\n        fk = fig_size / np.array(steps)     # 计算每层特征层的fk\n        # [[2], [2, 3], [2, 3], [2, 3], [2], [2]]\n        self.aspect_ratios = aspect_ratios  # 每个预测特征层上预测的default box的ratios\n\n        self.default_boxes = []\n        # size of feature and number of feature\n        # 遍历每层特征层，计算default box\n        for idx, sfeat in enumerate(self.feat_size):\n            sk1 = scales[idx] / fig_size  # scale转为相对值[0-1]\n            sk2 = scales[idx + 1] / fig_size  # scale转为相对值[0-1]\n            sk3 = sqrt(sk1 * sk2)\n            # 先添加两个1:1比例的default box宽和高\n            all_sizes = [(sk1, sk1), (sk3, sk3)]\n\n            # 再将剩下不同比例的default box宽和高添加到all_sizes中\n            for alpha in aspect_ratios[idx]:\n                w, h = sk1 * sqrt(alpha), sk1 / sqrt(alpha)\n                all_sizes.append((w, h))\n                all_sizes.append((h, w))\n\n            # 计算当前特征层对应原图上的所有default box\n            for w, h in all_sizes:\n                for i, j in itertools.product(range(sfeat), repeat=2):  # i -> 行（y）， j -> 列（x）\n                    # 计算每个default box的中心坐标（范围是在0-1之间）\n                    cx, cy = (j + 0.5) / fk[idx], (i + 0.5) / fk[idx]\n                    self.default_boxes.append((cx, cy, w, h))\n\n        # 将default_boxes转为tensor格式\n        self.dboxes = torch.as_tensor(self.default_boxes, dtype=torch.float32)  # 这里不转类型会报错\n        self.dboxes.clamp_(min=0, max=1)  # 将坐标（x, y, w, h）都限制在0-1之间\n\n        # For IoU calculation\n        # ltrb is left top coordinate and right bottom coordinate\n        # 将(x, y, w, h)转换成(xmin, ymin, xmax, ymax)，方便后续计算IoU(匹配正负样本时)\n        self.dboxes_ltrb = self.dboxes.clone()\n        self.dboxes_ltrb[:, 0] = self.dboxes[:, 0] - 0.5 * self.dboxes[:, 2]   # xmin\n        self.dboxes_ltrb[:, 1] = self.dboxes[:, 1] - 0.5 * self.dboxes[:, 3]   # ymin\n        self.dboxes_ltrb[:, 2] = self.dboxes[:, 0] + 0.5 * self.dboxes[:, 2]   # xmax\n        self.dboxes_ltrb[:, 3] = self.dboxes[:, 1] + 0.5 * self.dboxes[:, 3]   # ymax\n\n    @property\n    def scale_xy(self):\n        return self.scale_xy_\n\n    @property\n    def scale_wh(self):\n        return self.scale_wh_\n\n    def __call__(self, order='ltrb'):\n        # 根据需求返回对应格式的default box\n        if order == 'ltrb':\n            return self.dboxes_ltrb\n\n        if order == 'xywh':\n            return self.dboxes\n\n\ndef dboxes300_coco():\n    figsize = 300  # 输入网络的图像大小\n    feat_size = [38, 19, 10, 5, 3, 1]   # 每个预测层的feature map尺寸\n    steps = [8, 16, 32, 64, 100, 300]   # 每个特征层上的一个cell在原图上的跨度\n    # use the scales here: https://github.com/amdegroot/ssd.pytorch/blob/master/data/config.py\n    scales = [21, 45, 99, 153, 207, 261, 315]  # 每个特征层上预测的default box的scale\n    aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]  # 每个预测特征层上预测的default box的ratios\n    dboxes = DefaultBoxes(figsize, feat_size, steps, scales, aspect_ratios)\n    return dboxes\n\n\ndef nms(boxes, scores, iou_threshold):\n    # type: (Tensor, Tensor, float) -> Tensor\n    \"\"\"\n    Performs non-maximum suppression (NMS) on the boxes according\n    to their intersection-over-union (IoU).\n\n    NMS iteratively removes lower scoring boxes which have an\n    IoU greater than iou_threshold with another (higher scoring)\n    box.\n\n    Parameters\n    ----------\n    boxes : Tensor[N, 4])\n        boxes to perform NMS on. They\n        are expected to be in (x1, y1, x2, y2) format\n    scores : Tensor[N]\n        scores for each one of the boxes\n    iou_threshold : float\n        discards all overlapping\n        boxes with IoU < iou_threshold\n\n    Returns\n    -------\n    keep : Tensor\n        int64 tensor with the indices\n        of the elements that have been kept\n        by NMS, sorted in decreasing order of scores\n    \"\"\"\n    return torch.ops.torchvision.nms(boxes, scores, iou_threshold)\n\n\ndef batched_nms(boxes, scores, idxs, iou_threshold):\n    # type: (Tensor, Tensor, Tensor, float) -> Tensor\n    \"\"\"\n    Performs non-maximum suppression in a batched fashion.\n\n    Each index value correspond to a category, and NMS\n    will not be applied between elements of different categories.\n\n    Parameters\n    ----------\n    boxes : Tensor[N, 4]\n        boxes where NMS will be performed. They\n        are expected to be in (x1, y1, x2, y2) format\n    scores : Tensor[N]\n        scores for each one of the boxes\n    idxs : Tensor[N]\n        indices of the categories for each one of the boxes.\n    iou_threshold : float\n        discards all overlapping boxes\n        with IoU < iou_threshold\n\n    Returns\n    -------\n    keep : Tensor\n        int64 tensor with the indices of\n        the elements that have been kept by NMS, sorted\n        in decreasing order of scores\n    \"\"\"\n    if boxes.numel() == 0:\n        return torch.empty((0,), dtype=torch.int64, device=boxes.device)\n\n    # strategy: in order to perform NMS independently per class.\n    # we add an offset to all the boxes. The offset is dependent\n    # only on the class idx, and is large enough so that boxes\n    # from different classes do not overlap\n    # 获取所有boxes中最大的坐标值（xmin, ymin, xmax, ymax）\n    max_coordinate = boxes.max()\n\n    # to(): Performs Tensor dtype and/or device conversion\n    # 为每一个类别生成一个很大的偏移量\n    # 这里的to只是让生成tensor的dytpe和device与boxes保持一致\n    offsets = idxs.to(boxes) * (max_coordinate + 1)\n    # boxes加上对应层的偏移量后，保证不同类别之间boxes不会有重合的现象\n    boxes_for_nms = boxes + offsets[:, None]\n    keep = nms(boxes_for_nms, scores, iou_threshold)\n    return keep\n\n\nclass PostProcess(nn.Module):\n    def __init__(self, dboxes):\n        super(PostProcess, self).__init__()\n        # [num_anchors, 4] -> [1, num_anchors, 4]\n        self.dboxes_xywh = nn.Parameter(dboxes(order='xywh').unsqueeze(dim=0),\n                                        requires_grad=False)\n        self.scale_xy = dboxes.scale_xy  # 0.1\n        self.scale_wh = dboxes.scale_wh  # 0.2\n\n        self.criteria = 0.5\n        self.max_output = 100\n\n    def scale_back_batch(self, bboxes_in, scores_in):\n        # type: (Tensor, Tensor) -> Tuple[Tensor, Tensor]\n        \"\"\"\n            1）通过预测的boxes回归参数得到最终预测坐标\n            2）将box格式从xywh转换回ltrb\n            3）将预测目标score通过softmax处理\n            Do scale and transform from xywh to ltrb\n            suppose input N x 4 x num_bbox | N x label_num x num_bbox\n\n            bboxes_in: [N, 4, 8732]是网络预测的xywh回归参数\n            scores_in: [N, label_num, 8732]是预测的每个default box的各目标概率\n        \"\"\"\n\n        # Returns a view of the original tensor with its dimensions permuted.\n        # [batch, 4, 8732] -> [batch, 8732, 4]\n        bboxes_in = bboxes_in.permute(0, 2, 1)\n        # [batch, label_num, 8732] -> [batch, 8732, label_num]\n        scores_in = scores_in.permute(0, 2, 1)\n        # print(bboxes_in.is_contiguous())\n\n        bboxes_in[:, :, :2] = self.scale_xy * bboxes_in[:, :, :2]   # 预测的x, y回归参数\n        bboxes_in[:, :, 2:] = self.scale_wh * bboxes_in[:, :, 2:]   # 预测的w, h回归参数\n\n        # 将预测的回归参数叠加到default box上得到最终的预测边界框\n        bboxes_in[:, :, :2] = bboxes_in[:, :, :2] * self.dboxes_xywh[:, :, 2:] + self.dboxes_xywh[:, :, :2]\n        bboxes_in[:, :, 2:] = bboxes_in[:, :, 2:].exp() * self.dboxes_xywh[:, :, 2:]\n\n        # transform format to ltrb\n        l = bboxes_in[:, :, 0] - 0.5 * bboxes_in[:, :, 2]\n        t = bboxes_in[:, :, 1] - 0.5 * bboxes_in[:, :, 3]\n        r = bboxes_in[:, :, 0] + 0.5 * bboxes_in[:, :, 2]\n        b = bboxes_in[:, :, 1] + 0.5 * bboxes_in[:, :, 3]\n\n        bboxes_in[:, :, 0] = l  # xmin\n        bboxes_in[:, :, 1] = t  # ymin\n        bboxes_in[:, :, 2] = r  # xmax\n        bboxes_in[:, :, 3] = b  # ymax\n\n        # scores_in: [batch, 8732, label_num]\n        return bboxes_in, F.softmax(scores_in, dim=-1)\n\n    def decode_single_new(self, bboxes_in, scores_in, criteria, num_output):\n        # type: (Tensor, Tensor, float, int) -> Tuple[Tensor, Tensor, Tensor]\n        \"\"\"\n        decode:\n            input  : bboxes_in (Tensor 8732 x 4), scores_in (Tensor 8732 x nitems)\n            output : bboxes_out (Tensor nboxes x 4), labels_out (Tensor nboxes)\n            criteria : IoU threshold of bboexes\n            max_output : maximum number of output bboxes\n        \"\"\"\n        device = bboxes_in.device\n        num_classes = scores_in.shape[-1]\n\n        # 对越界的bbox进行裁剪\n        bboxes_in = bboxes_in.clamp(min=0, max=1)\n\n        # [8732, 4] -> [8732, 21, 4]\n        bboxes_in = bboxes_in.repeat(1, num_classes).reshape(scores_in.shape[0], -1, 4)\n\n        # create labels for each prediction\n        labels = torch.arange(num_classes, device=device)\n        # [num_classes] -> [8732, num_classes]\n        labels = labels.view(1, -1).expand_as(scores_in)\n\n        # remove prediction with the background label\n        # 移除归为背景类别的概率信息\n        bboxes_in = bboxes_in[:, 1:, :]  # [8732, 21, 4] -> [8732, 20, 4]\n        scores_in = scores_in[:, 1:]  # [8732, 21] -> [8732, 20]\n        labels = labels[:, 1:]  # [8732, 21] -> [8732, 20]\n\n        # batch everything, by making every class prediction be a separate instance\n        bboxes_in = bboxes_in.reshape(-1, 4)  # [8732, 20, 4] -> [8732x20, 4]\n        scores_in = scores_in.reshape(-1)  # [8732, 20] -> [8732x20]\n        labels = labels.reshape(-1)  # [8732, 20] -> [8732x20]\n\n        # remove low scoring boxes\n        # 移除低概率目标，self.scores_thresh=0.05\n        # inds = torch.nonzero(scores_in > 0.05).squeeze(1)\n        inds = torch.where(torch.gt(scores_in, 0.05))[0]\n        bboxes_in, scores_in, labels = bboxes_in[inds, :], scores_in[inds], labels[inds]\n\n        # remove empty boxes\n        ws, hs = bboxes_in[:, 2] - bboxes_in[:, 0], bboxes_in[:, 3] - bboxes_in[:, 1]\n        keep = (ws >= 1 / 300) & (hs >= 1 / 300)\n        # keep = keep.nonzero().squeeze(1)\n        keep = torch.where(keep)[0]\n        bboxes_in, scores_in, labels = bboxes_in[keep], scores_in[keep], labels[keep]\n\n        # non-maximum suppression\n        keep = batched_nms(bboxes_in, scores_in, labels, iou_threshold=criteria)\n\n        # keep only topk scoring predictions\n        keep = keep[:num_output]\n        bboxes_out = bboxes_in[keep, :]\n        scores_out = scores_in[keep]\n        labels_out = labels[keep]\n\n        return bboxes_out, labels_out, scores_out\n\n    def forward(self, bboxes_in, scores_in):\n        # 通过预测的boxes回归参数得到最终预测坐标, 将预测目标score通过softmax处理\n        bboxes, probs = self.scale_back_batch(bboxes_in, scores_in)\n\n        outputs = torch.jit.annotate(List[Tuple[Tensor, Tensor, Tensor]], [])\n        # 遍历一个batch中的每张image数据\n        # bboxes: [batch, 8732, 4]\n        for bbox, prob in zip(bboxes.split(1, 0), probs.split(1, 0)):  # split_size, split_dim\n            # bbox: [1, 8732, 4]\n            bbox = bbox.squeeze(0)\n            prob = prob.squeeze(0)\n            outputs.append(self.decode_single_new(bbox, prob, self.criteria, self.max_output))\n        return outputs\n"
  },
  {
    "path": "pytorch_object_detection/ssd/train_multi_GPU.py",
    "content": "import time\nimport os\nimport datetime\n\nimport torch\n\nimport transforms\nfrom my_dataset import VOCDataSet\nfrom src import SSD300, Backbone\nimport train_utils.train_eval_utils as utils\nfrom train_utils import GroupedBatchSampler, create_aspect_ratio_groups, init_distributed_mode, save_on_master, mkdir\n\n\ndef create_model(num_classes):\n    # https://download.pytorch.org/models/resnet50-19c8e357.pth\n    # pre_train_path = \"./src/resnet50.pth\"\n    backbone = Backbone(pretrain_path=None)\n    model = SSD300(backbone=backbone, num_classes=num_classes)\n\n    pre_ssd_path = \"./src/nvidia_ssdpyt_fp32.pt\"\n    pre_model_dict = torch.load(pre_ssd_path, map_location='cpu')\n    pre_weights_dict = pre_model_dict[\"model\"]\n\n    # 删除类别预测器权重，注意，回归预测器的权重可以重用，因为不涉及num_classes\n    del_conf_loc_dict = {}\n    for k, v in pre_weights_dict.items():\n        split_key = k.split(\".\")\n        if \"conf\" in split_key:\n            continue\n        del_conf_loc_dict.update({k: v})\n\n    missing_keys, unexpected_keys = model.load_state_dict(del_conf_loc_dict, strict=False)\n    if len(missing_keys) != 0 or len(unexpected_keys) != 0:\n        print(\"missing_keys: \", missing_keys)\n        print(\"unexpected_keys: \", unexpected_keys)\n\n    return model\n\n\ndef main(args):\n    init_distributed_mode(args)\n    print(args)\n\n    device = torch.device(args.device)\n\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    # Data loading code\n    print(\"Loading data\")\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.SSDCropping(),\n                                     transforms.Resize(),\n                                     transforms.ColorJitter(),\n                                     transforms.ToTensor(),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.Normalization(),\n                                     transforms.AssignGTtoDefaultBox()]),\n        \"val\": transforms.Compose([transforms.Resize(),\n                                   transforms.ToTensor(),\n                                   transforms.Normalization()])\n    }\n\n    VOC_root = args.data_path\n    # check voc root\n    if os.path.exists(os.path.join(VOC_root, \"VOCdevkit\")) is False:\n        raise FileNotFoundError(\"VOCdevkit dose not in path:'{}'.\".format(VOC_root))\n\n    # load train data set\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt\n    train_data_set = VOCDataSet(VOC_root, \"2012\", data_transform[\"train\"], train_set='train.txt')\n\n    # load validation data set\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt\n    val_data_set = VOCDataSet(VOC_root, \"2012\", data_transform[\"val\"], train_set='val.txt')\n\n    print(\"Creating data loaders\")\n    if args.distributed:\n        train_sampler = torch.utils.data.distributed.DistributedSampler(train_data_set)\n        test_sampler = torch.utils.data.distributed.DistributedSampler(val_data_set)\n    else:\n        train_sampler = torch.utils.data.RandomSampler(train_data_set)\n        test_sampler = torch.utils.data.SequentialSampler(val_data_set)\n\n    if args.aspect_ratio_group_factor >= 0:\n        # 统计所有图像比例在bins区间中的位置索引\n        group_ids = create_aspect_ratio_groups(train_data_set, k=args.aspect_ratio_group_factor)\n        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)\n    else:\n        train_batch_sampler = torch.utils.data.BatchSampler(\n            train_sampler, args.batch_size, drop_last=True)\n\n    data_loader = torch.utils.data.DataLoader(\n        train_data_set, batch_sampler=train_batch_sampler, num_workers=args.workers,\n        collate_fn=train_data_set.collate_fn)\n\n    data_loader_test = torch.utils.data.DataLoader(\n        val_data_set, batch_size=1,\n        sampler=test_sampler, num_workers=args.workers,\n        collate_fn=train_data_set.collate_fn)\n\n    print(\"Creating model\")\n    model = create_model(num_classes=args.num_classes+1)\n    model.to(device)\n\n    model_without_ddp = model\n    if args.distributed:\n        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])\n        model_without_ddp = model.module\n\n    params = [p for p in model.parameters() if p.requires_grad]\n    optimizer = torch.optim.SGD(\n        params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)\n\n    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma)\n    # lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)\n\n    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练\n    if args.resume:\n        # If map_location is missing, torch.load will first load the module to CPU\n        # and then copy each parameter to where it was saved,\n        # which would result in all processes on the same machine using the same set of devices.\n        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)\n        model_without_ddp.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n\n    if args.test_only:\n        utils.evaluate(model, data_loader_test, device=device)\n        return\n\n    train_loss = []\n    learning_rate = []\n    val_map = []\n    print(\"Start training\")\n    start_time = time.time()\n    for epoch in range(args.start_epoch, args.epochs):\n        if args.distributed:\n            train_sampler.set_epoch(epoch)\n\n        mean_loss, lr = utils.train_one_epoch(model, optimizer, data_loader, device,\n                                              epoch, args.print_freq, warmup=True)\n        # only first process to save training info\n        if args.rank in [-1, 0]:\n            train_loss.append(mean_loss.item())\n            learning_rate.append(lr)\n\n        # update learning rate\n        lr_scheduler.step()\n\n        # evaluate after every epoch\n        coco_info = utils.evaluate(model, data_loader_test, device=device)\n\n        if args.rank in [-1, 0]:\n            # write into txt\n            with open(results_file, \"a\") as f:\n                # 写入的数据包括coco指标还有loss和learning rate\n                result_info = [str(round(i, 4)) for i in coco_info + [mean_loss.item()]] + [str(round(lr, 6))]\n                txt = \"epoch:{} {}\".format(epoch, '  '.join(result_info))\n                f.write(txt + \"\\n\")\n\n            val_map.append(coco_info[1])  # pascal mAP\n\n        if args.output_dir:\n            # 只在主节点上执行保存权重操作\n            save_on_master({\n                'model': model_without_ddp.state_dict(),\n                'optimizer': optimizer.state_dict(),\n                'lr_scheduler': lr_scheduler.state_dict(),\n                'args': args,\n                'epoch': epoch},\n                os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))\n\n    total_time = time.time() - start_time\n    total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n    print('Training time {}'.format(total_time_str))\n\n    if args.rank in [-1, 0]:\n        # plot loss and lr curve\n        if len(train_loss) != 0 and len(learning_rate) != 0:\n            from plot_curve import plot_loss_and_lr\n            plot_loss_and_lr(train_loss, learning_rate)\n\n        # plot mAP curve\n        if len(val_map) != 0:\n            from plot_curve import plot_map\n            plot_map(val_map)\n\n\nif __name__ == \"__main__\":\n    import argparse\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 训练文件的根目录(VOCdevkit)\n    parser.add_argument('--data-path', default='./', help='dataset')\n    # 检测的目标类别个数，不包括背景\n    parser.add_argument('--num_classes', default=20, type=int, help='num_classes')\n    # 训练设备类型\n    parser.add_argument('--device', default='cuda', help='device')\n    # 每块GPU上的batch_size\n    parser.add_argument('-b', '--batch-size', default=8, type=int,\n                        help='images per gpu, the total batch size is $NGPU x batch_size')\n    # 指定接着从哪个epoch数开始训练\n    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')\n    # 训练的总epoch数\n    parser.add_argument('--epochs', default=20, type=int, metavar='N',\n                        help='number of total epochs to run')\n    # 数据加载以及预处理的线程数\n    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',\n                        help='number of data loading workers (default: 4)')\n    # 学习率，这个需要根据gpu的数量以及batch_size进行设置0.005 / 8 * num_GPU\n    parser.add_argument('--lr', default=0.005, type=float,\n                        help='initial learning rate, 0.005 is the default value for training '\n                        'on 8 gpus and 2 images_per_gpu')\n    # SGD的momentum参数\n    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',\n                        help='momentum')\n    # SGD的weight_decay参数\n    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,\n                        metavar='W', help='weight decay (default: 1e-4)',\n                        dest='weight_decay')\n    # 针对torch.optim.lr_scheduler.StepLR的参数\n    parser.add_argument('--lr-step-size', default=5, type=int, help='decrease lr every step-size epochs')\n    # 针对torch.optim.lr_scheduler.MultiStepLR的参数\n    parser.add_argument('--lr-steps', default=[7, 12], nargs='+', type=int, help='decrease lr every step-size epochs')\n    # 针对torch.optim.lr_scheduler.MultiStepLR的参数\n    parser.add_argument('--lr-gamma', default=0.3, type=float, help='decrease lr by a factor of lr-gamma')\n    # 训练过程打印信息的频率\n    parser.add_argument('--print-freq', default=20, type=int, help='print frequency')\n    # 文件保存地址\n    parser.add_argument('--output-dir', default='./multi_train', help='path where to save')\n    # 基于上次的训练结果接着训练\n    parser.add_argument('--resume', default='', help='resume from checkpoint')\n    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)\n    # 不训练，仅测试\n    parser.add_argument(\n        \"--test-only\",\n        dest=\"test_only\",\n        help=\"Only test the model\",\n        action=\"store_true\",\n    )\n\n    # 开启的进程数(注意不是线程)\n    parser.add_argument('--world-size', default=4, type=int,\n                        help='number of distributed processes')\n    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')\n\n    args = parser.parse_args()\n\n    # 如果指定了保存文件地址，检查文件夹是否存在，若不存在，则创建\n    if args.output_dir:\n        mkdir(args.output_dir)\n\n    main(args)\n"
  },
  {
    "path": "pytorch_object_detection/ssd/train_ssd300.py",
    "content": "import os\nimport datetime\n\nimport torch\n\nimport transforms\nfrom my_dataset import VOCDataSet\nfrom src import SSD300, Backbone\nimport train_utils.train_eval_utils as utils\nfrom train_utils import get_coco_api_from_dataset\n\n\ndef create_model(num_classes=21):\n    # https://download.pytorch.org/models/resnet50-19c8e357.pth\n    # pre_train_path = \"./src/resnet50.pth\"\n    backbone = Backbone()\n    model = SSD300(backbone=backbone, num_classes=num_classes)\n\n    # https://ngc.nvidia.com/catalog/models -> search ssd -> download FP32\n    pre_ssd_path = \"./src/nvidia_ssdpyt_fp32.pt\"\n    if os.path.exists(pre_ssd_path) is False:\n        raise FileNotFoundError(\"nvidia_ssdpyt_fp32.pt not find in {}\".format(pre_ssd_path))\n    pre_model_dict = torch.load(pre_ssd_path, map_location='cpu')\n    pre_weights_dict = pre_model_dict[\"model\"]\n\n    # 删除类别预测器权重，注意，回归预测器的权重可以重用，因为不涉及num_classes\n    del_conf_loc_dict = {}\n    for k, v in pre_weights_dict.items():\n        split_key = k.split(\".\")\n        if \"conf\" in split_key:\n            continue\n        del_conf_loc_dict.update({k: v})\n\n    missing_keys, unexpected_keys = model.load_state_dict(del_conf_loc_dict, strict=False)\n    if len(missing_keys) != 0 or len(unexpected_keys) != 0:\n        print(\"missing_keys: \", missing_keys)\n        print(\"unexpected_keys: \", unexpected_keys)\n\n    return model\n\n\ndef main(parser_data):\n    device = torch.device(parser_data.device if torch.cuda.is_available() else \"cpu\")\n    print(\"Using {} device training.\".format(device.type))\n\n    if not os.path.exists(\"save_weights\"):\n        os.mkdir(\"save_weights\")\n\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.SSDCropping(),\n                                     transforms.Resize(),\n                                     transforms.ColorJitter(),\n                                     transforms.ToTensor(),\n                                     transforms.RandomHorizontalFlip(),\n                                     transforms.Normalization(),\n                                     transforms.AssignGTtoDefaultBox()]),\n        \"val\": transforms.Compose([transforms.Resize(),\n                                   transforms.ToTensor(),\n                                   transforms.Normalization()])\n    }\n\n    VOC_root = parser_data.data_path\n    # check voc root\n    if os.path.exists(os.path.join(VOC_root, \"VOCdevkit\")) is False:\n        raise FileNotFoundError(\"VOCdevkit dose not in path:'{}'.\".format(VOC_root))\n\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> train.txt\n    train_dataset = VOCDataSet(VOC_root, \"2012\", data_transform['train'], train_set='train.txt')\n    # 注意训练时，batch_size必须大于1\n    batch_size = parser_data.batch_size\n    assert batch_size > 1, \"batch size must be greater than 1\"\n    # 防止最后一个batch_size=1，如果最后一个batch_size=1就舍去\n    drop_last = True if len(train_dataset) % batch_size == 1 else False\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using %g dataloader workers' % nw)\n    train_data_loader = torch.utils.data.DataLoader(train_dataset,\n                                                    batch_size=batch_size,\n                                                    shuffle=True,\n                                                    num_workers=nw,\n                                                    collate_fn=train_dataset.collate_fn,\n                                                    drop_last=drop_last)\n\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt\n    val_dataset = VOCDataSet(VOC_root, \"2012\", data_transform['val'], train_set='val.txt')\n    val_data_loader = torch.utils.data.DataLoader(val_dataset,\n                                                  batch_size=batch_size,\n                                                  shuffle=False,\n                                                  num_workers=nw,\n                                                  collate_fn=train_dataset.collate_fn)\n\n    model = create_model(num_classes=args.num_classes+1)\n    model.to(device)\n\n    # define optimizer\n    params = [p for p in model.parameters() if p.requires_grad]\n    optimizer = torch.optim.SGD(params, lr=0.0005,\n                                momentum=0.9, weight_decay=0.0005)\n    # learning rate scheduler\n    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,\n                                                   step_size=5,\n                                                   gamma=0.3)\n\n    # 如果指定了上次训练保存的权重文件地址，则接着上次结果接着训练\n    if parser_data.resume != \"\":\n        checkpoint = torch.load(parser_data.resume, map_location='cpu')\n        model.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        parser_data.start_epoch = checkpoint['epoch'] + 1\n        print(\"the training process from epoch{}...\".format(parser_data.start_epoch))\n\n    train_loss = []\n    learning_rate = []\n    val_map = []\n\n    # 提前加载验证集数据，以免每次验证时都要重新加载一次数据，节省时间\n    val_data = get_coco_api_from_dataset(val_data_loader.dataset)\n    for epoch in range(parser_data.start_epoch, parser_data.epochs):\n        mean_loss, lr = utils.train_one_epoch(model=model, optimizer=optimizer,\n                                              data_loader=train_data_loader,\n                                              device=device, epoch=epoch,\n                                              print_freq=50)\n        train_loss.append(mean_loss.item())\n        learning_rate.append(lr)\n\n        # update learning rate\n        lr_scheduler.step()\n\n        coco_info = utils.evaluate(model=model, data_loader=val_data_loader,\n                                   device=device, data_set=val_data)\n\n        # write into txt\n        with open(results_file, \"a\") as f:\n            # 写入的数据包括coco指标还有loss和learning rate\n            result_info = [str(round(i, 4)) for i in coco_info + [mean_loss.item()]] + [str(round(lr, 6))]\n            txt = \"epoch:{} {}\".format(epoch, '  '.join(result_info))\n            f.write(txt + \"\\n\")\n\n        val_map.append(coco_info[1])  # pascal mAP\n\n        # save weights\n        save_files = {\n            'model': model.state_dict(),\n            'optimizer': optimizer.state_dict(),\n            'lr_scheduler': lr_scheduler.state_dict(),\n            'epoch': epoch}\n        torch.save(save_files, \"./save_weights/ssd300-{}.pth\".format(epoch))\n\n    # plot loss and lr curve\n    if len(train_loss) != 0 and len(learning_rate) != 0:\n        from plot_curve import plot_loss_and_lr\n        plot_loss_and_lr(train_loss, learning_rate)\n\n    # plot mAP curve\n    if len(val_map) != 0:\n        from plot_curve import plot_map\n        plot_map(val_map)\n\n    # inputs = torch.rand(size=(2, 3, 300, 300))\n    # output = model(inputs)\n    # print(output)\n\n\nif __name__ == '__main__':\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 训练设备类型\n    parser.add_argument('--device', default='cuda:0', help='device')\n    # 检测的目标类别个数，不包括背景\n    parser.add_argument('--num_classes', default=20, type=int, help='num_classes')\n    # 训练数据集的根目录(VOCdevkit)\n    parser.add_argument('--data-path', default='./', help='dataset')\n    # 文件保存地址\n    parser.add_argument('--output-dir', default='./save_weights', help='path where to save')\n    # 若需要接着上次训练，则指定上次训练保存权重文件地址\n    parser.add_argument('--resume', default='', type=str, help='resume from checkpoint')\n    # 指定接着从哪个epoch数开始训练\n    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')\n    # 训练的总epoch数\n    parser.add_argument('--epochs', default=15, type=int, metavar='N',\n                        help='number of total epochs to run')\n    # 训练的batch size\n    parser.add_argument('--batch_size', default=4, type=int, metavar='N',\n                        help='batch size when training.')\n\n    args = parser.parse_args()\n    print(args)\n\n    # 检查保存权重文件夹是否存在，不存在则创建\n    if not os.path.exists(args.output_dir):\n        os.makedirs(args.output_dir)\n\n    main(args)\n"
  },
  {
    "path": "pytorch_object_detection/ssd/train_utils/__init__.py",
    "content": "from .coco_utils import get_coco_api_from_dataset\nfrom .coco_eval import CocoEvaluator\nfrom .distributed_utils import init_distributed_mode, save_on_master, mkdir\nfrom .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups\n"
  },
  {
    "path": "pytorch_object_detection/ssd/train_utils/coco_eval.py",
    "content": "import json\nimport copy\nfrom collections import defaultdict\n\nimport numpy as np\nimport torch\nimport torch._six\n\nfrom pycocotools.cocoeval import COCOeval\nfrom pycocotools.coco import COCO\nimport pycocotools.mask as mask_util\n\nfrom train_utils.distributed_utils import all_gather\n\n\nclass CocoEvaluator(object):\n    def __init__(self, coco_gt, iou_types):\n        assert isinstance(iou_types, (list, tuple))\n        coco_gt = copy.deepcopy(coco_gt)\n        self.coco_gt = coco_gt\n\n        self.iou_types = iou_types\n        self.coco_eval = {}\n        for iou_type in iou_types:\n            self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)\n\n        self.img_ids = []\n        self.eval_imgs = {k: [] for k in iou_types}\n\n    def update(self, predictions):\n        img_ids = list(np.unique(list(predictions.keys())))\n        self.img_ids.extend(img_ids)\n\n        for iou_type in self.iou_types:\n            results = self.prepare(predictions, iou_type)\n            coco_dt = loadRes(self.coco_gt, results) if results else COCO()\n            coco_eval = self.coco_eval[iou_type]\n\n            coco_eval.cocoDt = coco_dt\n            coco_eval.params.imgIds = list(img_ids)\n            img_ids, eval_imgs = evaluate(coco_eval)\n\n            self.eval_imgs[iou_type].append(eval_imgs)\n\n    def synchronize_between_processes(self):\n        for iou_type in self.iou_types:\n            self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)\n            create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])\n\n    def accumulate(self):\n        for coco_eval in self.coco_eval.values():\n            coco_eval.accumulate()\n\n    def summarize(self):\n        for iou_type, coco_eval in self.coco_eval.items():\n            print(\"IoU metric: {}\".format(iou_type))\n            coco_eval.summarize()\n\n    def prepare(self, predictions, iou_type):\n        if iou_type == \"bbox\":\n            return self.prepare_for_coco_detection(predictions)\n        elif iou_type == \"segm\":\n            return self.prepare_for_coco_segmentation(predictions)\n        elif iou_type == \"keypoints\":\n            return self.prepare_for_coco_keypoint(predictions)\n        else:\n            raise ValueError(\"Unknown iou type {}\".format(iou_type))\n\n    def prepare_for_coco_detection(self, predictions):\n        coco_results = []\n        for original_id, prediction in predictions.items():\n            if len(prediction) == 0:\n                continue\n\n            # xmin, ymin, xmax, ymax\n            boxes = prediction[\"boxes\"]\n            boxes = convert_to_xywh(boxes)\n            boxes = boxes.tolist()\n            scores = prediction[\"scores\"].tolist()\n            labels = prediction[\"labels\"].tolist()\n\n            coco_results.extend(\n                [\n                    {\n                        \"image_id\": original_id,\n                        \"category_id\": labels[k],\n                        \"bbox\": box,\n                        \"score\": scores[k],\n                    }\n                    for k, box in enumerate(boxes)\n                ]\n            )\n        return coco_results\n\n    def prepare_for_coco_segmentation(self, predictions):\n        coco_results = []\n        for original_id, prediction in predictions.items():\n            if len(prediction) == 0:\n                continue\n\n            scores = prediction[\"scores\"]\n            labels = prediction[\"labels\"]\n            masks = prediction[\"masks\"]\n\n            masks = masks > 0.5\n\n            scores = prediction[\"scores\"].tolist()\n            labels = prediction[\"labels\"].tolist()\n\n            rles = [\n                mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order=\"F\"))[0]\n                for mask in masks\n            ]\n            for rle in rles:\n                rle[\"counts\"] = rle[\"counts\"].decode(\"utf-8\")\n\n            coco_results.extend(\n                [\n                    {\n                        \"image_id\": original_id,\n                        \"category_id\": labels[k],\n                        \"segmentation\": rle,\n                        \"score\": scores[k],\n                    }\n                    for k, rle in enumerate(rles)\n                ]\n            )\n        return coco_results\n\n    def prepare_for_coco_keypoint(self, predictions):\n        coco_results = []\n        for original_id, prediction in predictions.items():\n            if len(prediction) == 0:\n                continue\n\n            boxes = prediction[\"boxes\"]\n            boxes = convert_to_xywh(boxes).tolist()\n            scores = prediction[\"scores\"].tolist()\n            labels = prediction[\"labels\"].tolist()\n            keypoints = prediction[\"keypoints\"]\n            keypoints = keypoints.flatten(start_dim=1).tolist()\n\n            coco_results.extend(\n                [\n                    {\n                        \"image_id\": original_id,\n                        \"category_id\": labels[k],\n                        'keypoints': keypoint,\n                        \"score\": scores[k],\n                    }\n                    for k, keypoint in enumerate(keypoints)\n                ]\n            )\n        return coco_results\n\n\ndef convert_to_xywh(boxes):\n    xmin, ymin, xmax, ymax = boxes.unbind(1)\n    return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)\n\n\ndef merge(img_ids, eval_imgs):\n    all_img_ids = all_gather(img_ids)\n    all_eval_imgs = all_gather(eval_imgs)\n\n    merged_img_ids = []\n    for p in all_img_ids:\n        merged_img_ids.extend(p)\n\n    merged_eval_imgs = []\n    for p in all_eval_imgs:\n        merged_eval_imgs.append(p)\n\n    merged_img_ids = np.array(merged_img_ids)\n    merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)\n\n    # keep only unique (and in sorted order) images\n    merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)\n    merged_eval_imgs = merged_eval_imgs[..., idx]\n\n    return merged_img_ids, merged_eval_imgs\n\n\ndef create_common_coco_eval(coco_eval, img_ids, eval_imgs):\n    img_ids, eval_imgs = merge(img_ids, eval_imgs)\n    img_ids = list(img_ids)\n    eval_imgs = list(eval_imgs.flatten())\n\n    coco_eval.evalImgs = eval_imgs\n    coco_eval.params.imgIds = img_ids\n    coco_eval._paramsEval = copy.deepcopy(coco_eval.params)\n\n\n#################################################################\n# From pycocotools, just removed the prints and fixed\n# a Python3 bug about unicode not defined\n#################################################################\n\n# Ideally, pycocotools wouldn't have hard-coded prints\n# so that we could avoid copy-pasting those two functions\n\ndef createIndex(self):\n    # create index\n    # print('creating index...')\n    anns, cats, imgs = {}, {}, {}\n    imgToAnns, catToImgs = defaultdict(list), defaultdict(list)\n    if 'annotations' in self.dataset:\n        for ann in self.dataset['annotations']:\n            imgToAnns[ann['image_id']].append(ann)\n            anns[ann['id']] = ann\n\n    if 'images' in self.dataset:\n        for img in self.dataset['images']:\n            imgs[img['id']] = img\n\n    if 'categories' in self.dataset:\n        for cat in self.dataset['categories']:\n            cats[cat['id']] = cat\n\n    if 'annotations' in self.dataset and 'categories' in self.dataset:\n        for ann in self.dataset['annotations']:\n            catToImgs[ann['category_id']].append(ann['image_id'])\n\n    # print('index created!')\n\n    # create class members\n    self.anns = anns\n    self.imgToAnns = imgToAnns\n    self.catToImgs = catToImgs\n    self.imgs = imgs\n    self.cats = cats\n\n\nmaskUtils = mask_util\n\n\ndef loadRes(self, resFile):\n    \"\"\"\n    Load result file and return a result api object.\n    :param   resFile (str)     : file name of result file\n    :return: res (obj)         : result api object\n    \"\"\"\n    res = COCO()\n    res.dataset['images'] = [img for img in self.dataset['images']]\n\n    # print('Loading and preparing results...')\n    # tic = time.time()\n    if isinstance(resFile, torch._six.string_classes):\n        anns = json.load(open(resFile))\n    elif type(resFile) == np.ndarray:\n        anns = self.loadNumpyAnnotations(resFile)\n    else:\n        anns = resFile\n    assert type(anns) == list, 'results in not an array of objects'\n    annsImgIds = [ann['image_id'] for ann in anns]\n    assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \\\n        'Results do not correspond to current coco set'\n    if 'caption' in anns[0]:\n        imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])\n        res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]\n        for id, ann in enumerate(anns):\n            ann['id'] = id + 1\n    elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:\n        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])\n        for id, ann in enumerate(anns):\n            bb = ann['bbox']\n            x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]\n            if 'segmentation' not in ann:\n                ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]\n            ann['area'] = bb[2] * bb[3]\n            ann['id'] = id + 1\n            ann['iscrowd'] = 0\n    elif 'segmentation' in anns[0]:\n        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])\n        for id, ann in enumerate(anns):\n            # now only support compressed RLE format as segmentation results\n            ann['area'] = maskUtils.area(ann['segmentation'])\n            if 'bbox' not in ann:\n                ann['bbox'] = maskUtils.toBbox(ann['segmentation'])\n            ann['id'] = id + 1\n            ann['iscrowd'] = 0\n    elif 'keypoints' in anns[0]:\n        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])\n        for id, ann in enumerate(anns):\n            s = ann['keypoints']\n            x = s[0::3]\n            y = s[1::3]\n            x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y)\n            ann['area'] = (x2 - x1) * (y2 - y1)\n            ann['id'] = id + 1\n            ann['bbox'] = [x1, y1, x2 - x1, y2 - y1]\n    # print('DONE (t={:0.2f}s)'.format(time.time()- tic))\n\n    res.dataset['annotations'] = anns\n    createIndex(res)\n    return res\n\n\ndef evaluate(self):\n    '''\n    Run per image evaluation on given images and store results (a list of dict) in self.evalImgs\n    :return: None\n    '''\n    # tic = time.time()\n    # print('Running per image evaluation...')\n    p = self.params\n    # add backward compatibility if useSegm is specified in params\n    if p.useSegm is not None:\n        p.iouType = 'segm' if p.useSegm == 1 else 'bbox'\n        print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))\n    # print('Evaluate annotation type *{}*'.format(p.iouType))\n    p.imgIds = list(np.unique(p.imgIds))\n    if p.useCats:\n        p.catIds = list(np.unique(p.catIds))\n    p.maxDets = sorted(p.maxDets)\n    self.params = p\n\n    self._prepare()\n    # loop through images, area range, max detection number\n    catIds = p.catIds if p.useCats else [-1]\n\n    if p.iouType == 'segm' or p.iouType == 'bbox':\n        computeIoU = self.computeIoU\n    elif p.iouType == 'keypoints':\n        computeIoU = self.computeOks\n    self.ious = {\n        (imgId, catId): computeIoU(imgId, catId)\n        for imgId in p.imgIds\n        for catId in catIds}\n\n    evaluateImg = self.evaluateImg\n    maxDet = p.maxDets[-1]\n    evalImgs = [\n        evaluateImg(imgId, catId, areaRng, maxDet)\n        for catId in catIds\n        for areaRng in p.areaRng\n        for imgId in p.imgIds\n    ]\n    # this is NOT in the pycocotools code, but could be done outside\n    evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))\n    self._paramsEval = copy.deepcopy(self.params)\n    # toc = time.time()\n    # print('DONE (t={:0.2f}s).'.format(toc-tic))\n    return p.imgIds, evalImgs\n\n#################################################################\n# end of straight copy from pycocotools, just removing the prints\n#################################################################\n"
  },
  {
    "path": "pytorch_object_detection/ssd/train_utils/coco_utils.py",
    "content": "from tqdm import tqdm\n\nimport torch\nimport torchvision\nimport torch.utils.data\nfrom pycocotools.coco import COCO\n\n\ndef convert_to_coco_api(ds):\n    coco_ds = COCO()\n    # annotation IDs need to start at 1, not 0\n    ann_id = 1\n    dataset = {'images': [], 'categories': [], 'annotations': []}\n    categories = set()\n    for img_idx in range(len(ds)):\n        # find better way to get target\n        targets = ds.coco_index(img_idx)\n        image_id = targets[\"image_id\"].item()\n        img_dict = {}\n        img_dict['id'] = image_id\n        # img_dict['height'] = img.shape[-2]\n        # img_dict['width'] = img.shape[-1]\n        img_dict['height'] = targets[\"height_width\"][0]\n        img_dict['width'] = targets[\"height_width\"][1]\n        dataset['images'].append(img_dict)\n\n        # xmin, ymin, xmax, ymax\n        bboxes = targets[\"boxes\"]\n\n        # (xmin, ymin, xmax, ymax) to (xmin, ymin, w, h)\n        bboxes[:, 2:] -= bboxes[:, :2]\n        # 将box的相对坐标信息（0-1）转为绝对值坐标\n        bboxes[:, [0, 2]] = bboxes[:, [0, 2]] * img_dict[\"width\"]\n        bboxes[:, [1, 3]] = bboxes[:, [1, 3]] * img_dict[\"height\"]\n        bboxes = bboxes.tolist()\n        labels = targets['labels'].tolist()\n        # 注意这里的boxes area也要进行转换，否则导致(small, medium, large)计算错误\n        areas = (targets['area'] * img_dict[\"width\"] * img_dict[\"height\"]).tolist()\n        iscrowd = targets['iscrowd'].tolist()\n        num_objs = len(bboxes)\n        for i in range(num_objs):\n            ann = {}\n            ann['image_id'] = image_id\n            ann['bbox'] = bboxes[i]\n            ann['category_id'] = labels[i]\n            categories.add(labels[i])\n            ann['area'] = areas[i]\n            ann['iscrowd'] = iscrowd[i]\n            ann['id'] = ann_id\n            dataset['annotations'].append(ann)\n            ann_id += 1\n    dataset['categories'] = [{'id': i} for i in sorted(categories)]\n    coco_ds.dataset = dataset\n    coco_ds.createIndex()\n    return coco_ds\n\n\ndef get_coco_api_from_dataset(dataset):\n    for _ in range(10):\n        if isinstance(dataset, torchvision.datasets.CocoDetection):\n            break\n        if isinstance(dataset, torch.utils.data.Subset):\n            dataset = dataset.dataset\n    if isinstance(dataset, torchvision.datasets.CocoDetection):\n        return dataset.coco\n    return convert_to_coco_api(dataset)\n"
  },
  {
    "path": "pytorch_object_detection/ssd/train_utils/distributed_utils.py",
    "content": "from collections import defaultdict, deque\nimport datetime\nimport pickle\nimport time\nimport errno\nimport os\n\nimport torch\nimport torch.distributed as dist\n\n\nclass SmoothedValue(object):\n    \"\"\"Track a series of values and provide access to smoothed values over a\n    window or the global series average.\n    \"\"\"\n    def __init__(self, window_size=20, fmt=None):\n        if fmt is None:\n            fmt = \"{value:.4f} ({global_avg:.4f})\"\n        self.deque = deque(maxlen=window_size)  # deque简单理解成加强版list\n        self.total = 0.0\n        self.count = 0\n        self.fmt = fmt\n\n    def update(self, value, n=1):\n        self.deque.append(value)\n        self.count += n\n        self.total += value * n\n\n    def synchronize_between_processes(self):\n        \"\"\"\n        Warning: does not synchronize the deque!\n        \"\"\"\n        if not is_dist_avail_and_initialized():\n            return\n        t = torch.tensor([self.count, self.total], dtype=torch.float64, device=\"cuda\")\n        dist.barrier()\n        dist.all_reduce(t)\n        t = t.tolist()\n        self.count = int(t[0])\n        self.total = t[1]\n\n    @property\n    def median(self):  # @property 是装饰器，这里可简单理解为增加median属性(只读)\n        d = torch.tensor(list(self.deque))\n        return d.median().item()\n\n    @property\n    def avg(self):\n        d = torch.tensor(list(self.deque), dtype=torch.float32)\n        return d.mean().item()\n\n    @property\n    def global_avg(self):\n        return self.total / self.count\n\n    @property\n    def max(self):\n        return max(self.deque)\n\n    @property\n    def value(self):\n        return self.deque[-1]\n\n    def __str__(self):\n        return self.fmt.format(\n            median=self.median,\n            avg=self.avg,\n            global_avg=self.global_avg,\n            max=self.max,\n            value=self.value)\n\n\ndef all_gather(data):\n    \"\"\"\n    Run all_gather on arbitrary picklable data (not necessarily tensors)\n    Args:\n        data: any picklable object\n    Returns:\n        list[data]: list of data gathered from each rank\n    \"\"\"\n    world_size = get_world_size()\n    if world_size == 1:\n        return [data]\n\n    # serialized to a Tensor\n    buffer = pickle.dumps(data)\n    storage = torch.ByteStorage.from_buffer(buffer)\n    tensor = torch.ByteTensor(storage).to(\"cuda\")\n\n    # obtain Tensor size of each rank\n    local_size = torch.tensor([tensor.numel()], device=\"cuda\")\n    size_list = [torch.tensor([0], device=\"cuda\") for _ in range(world_size)]\n    dist.all_gather(size_list, local_size)\n    size_list = [int(size.item()) for size in size_list]\n    max_size = max(size_list)\n\n    # receiving Tensor from all ranks\n    # we pad the tensor because torch all_gather does not support\n    # gathering tensors of different shapes\n    tensor_list = []\n    for _ in size_list:\n        tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device=\"cuda\"))\n    if local_size != max_size:\n        padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device=\"cuda\")\n        tensor = torch.cat((tensor, padding), dim=0)\n    dist.all_gather(tensor_list, tensor)\n\n    data_list = []\n    for size, tensor in zip(size_list, tensor_list):\n        buffer = tensor.cpu().numpy().tobytes()[:size]\n        data_list.append(pickle.loads(buffer))\n\n    return data_list\n\n\ndef reduce_dict(input_dict, average=True):\n    \"\"\"\n    Args:\n        input_dict (dict): all the values will be reduced\n        average (bool): whether to do average or sum\n    Reduce the values in the dictionary from all processes so that all processes\n    have the averaged results. Returns a dict with the same fields as\n    input_dict, after reduction.\n    \"\"\"\n    world_size = get_world_size()\n    if world_size < 2:  # 单GPU的情况\n        return input_dict\n    with torch.no_grad():  # 多GPU的情况\n        names = []\n        values = []\n        # sort the keys so that they are consistent across processes\n        for k in sorted(input_dict.keys()):\n            names.append(k)\n            values.append(input_dict[k])\n        values = torch.stack(values, dim=0)\n        dist.all_reduce(values)\n        if average:\n            values /= world_size\n\n        reduced_dict = {k: v for k, v in zip(names, values)}\n        return reduced_dict\n\n\nclass MetricLogger(object):\n    def __init__(self, delimiter=\"\\t\"):\n        self.meters = defaultdict(SmoothedValue)\n        self.delimiter = delimiter\n\n    def update(self, **kwargs):\n        for k, v in kwargs.items():\n            if isinstance(v, torch.Tensor):\n                v = v.item()\n            assert isinstance(v, (float, int))\n            self.meters[k].update(v)\n\n    def __getattr__(self, attr):\n        if attr in self.meters:\n            return self.meters[attr]\n        if attr in self.__dict__:\n            return self.__dict__[attr]\n        raise AttributeError(\"'{}' object has no attribute '{}'\".format(\n            type(self).__name__, attr))\n\n    def __str__(self):\n        loss_str = []\n        for name, meter in self.meters.items():\n            loss_str.append(\n                \"{}: {}\".format(name, str(meter))\n            )\n        return self.delimiter.join(loss_str)\n\n    def synchronize_between_processes(self):\n        for meter in self.meters.values():\n            meter.synchronize_between_processes()\n\n    def add_meter(self, name, meter):\n        self.meters[name] = meter\n\n    def log_every(self, iterable, print_freq, header=None):\n        i = 0\n        if not header:\n            header = \"\"\n        start_time = time.time()\n        end = time.time()\n        iter_time = SmoothedValue(fmt='{avg:.4f}')\n        data_time = SmoothedValue(fmt='{avg:.4f}')\n        space_fmt = \":\" + str(len(str(len(iterable)))) + \"d\"\n        if torch.cuda.is_available():\n            log_msg = self.delimiter.join([header,\n                                           '[{0' + space_fmt + '}/{1}]',\n                                           'eta: {eta}',\n                                           '{meters}',\n                                           'time: {time}',\n                                           'data: {data}',\n                                           'max mem: {memory:.0f}'])\n        else:\n            log_msg = self.delimiter.join([header,\n                                           '[{0' + space_fmt + '}/{1}]',\n                                           'eta: {eta}',\n                                           '{meters}',\n                                           'time: {time}',\n                                           'data: {data}'])\n        MB = 1024.0 * 1024.0\n        for obj in iterable:\n            data_time.update(time.time() - end)\n            yield obj\n            iter_time.update(time.time() - end)\n            if i % print_freq == 0 or i == len(iterable) - 1:\n                eta_second = iter_time.global_avg * (len(iterable) - i)\n                eta_string = str(datetime.timedelta(seconds=eta_second))\n                if torch.cuda.is_available():\n                    print(log_msg.format(i, len(iterable),\n                                         eta=eta_string,\n                                         meters=str(self),\n                                         time=str(iter_time),\n                                         data=str(data_time),\n                                         memory=torch.cuda.max_memory_allocated() / MB))\n                else:\n                    print(log_msg.format(i, len(iterable),\n                                         eta=eta_string,\n                                         meters=str(self),\n                                         time=str(iter_time),\n                                         data=str(data_time)))\n            i += 1\n            end = time.time()\n        total_time = time.time() - start_time\n        total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n        print('{} Total time: {} ({:.4f} s / it)'.format(header,\n                                                         total_time_str,\n\n                                                         total_time / len(iterable)))\n\n\ndef warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):\n\n    def f(x):\n        \"\"\"根据step数返回一个学习率倍率因子\"\"\"\n        if x >= warmup_iters:  # 当迭代数大于给定的warmup_iters时，倍率因子为1\n            return 1\n        alpha = float(x) / warmup_iters\n        # 迭代过程中倍率因子从warmup_factor -> 1\n        return warmup_factor * (1 - alpha) + alpha\n\n    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)\n\n\ndef mkdir(path):\n    try:\n        os.makedirs(path)\n    except OSError as e:\n        if e.errno != errno.EEXIST:\n            raise\n\n\ndef setup_for_distributed(is_master):\n    \"\"\"\n    This function disables when not in master process\n    \"\"\"\n    import builtins as __builtin__\n    builtin_print = __builtin__.print\n\n    def print(*args, **kwargs):\n        force = kwargs.pop('force', False)\n        if is_master or force:\n            builtin_print(*args, **kwargs)\n\n    __builtin__.print = print\n\n\ndef is_dist_avail_and_initialized():\n    \"\"\"检查是否支持分布式环境\"\"\"\n    if not dist.is_available():\n        return False\n    if not dist.is_initialized():\n        return False\n    return True\n\n\ndef get_world_size():\n    if not is_dist_avail_and_initialized():\n        return 1\n    return dist.get_world_size()\n\n\ndef get_rank():\n    if not is_dist_avail_and_initialized():\n        return 0\n    return dist.get_rank()\n\n\ndef is_main_process():\n    return get_rank() == 0\n\n\ndef save_on_master(*args, **kwargs):\n    if is_main_process():\n        torch.save(*args, **kwargs)\n\n\ndef init_distributed_mode(args):\n    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:\n        args.rank = int(os.environ[\"RANK\"])\n        args.world_size = int(os.environ['WORLD_SIZE'])\n        args.gpu = int(os.environ['LOCAL_RANK'])\n    elif 'SLURM_PROCID' in os.environ:\n        args.rank = int(os.environ['SLURM_PROCID'])\n        args.gpu = args.rank % torch.cuda.device_count()\n    else:\n        print('Not using distributed mode')\n        args.distributed = False\n        return\n\n    args.distributed = True\n\n    torch.cuda.set_device(args.gpu)\n    args.dist_backend = 'nccl'\n    print('| distributed init (rank {}): {}'.format(\n        args.rank, args.dist_url), flush=True)\n    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,\n                                         world_size=args.world_size, rank=args.rank)\n    torch.distributed.barrier()\n    setup_for_distributed(args.rank == 0)\n\n"
  },
  {
    "path": "pytorch_object_detection/ssd/train_utils/group_by_aspect_ratio.py",
    "content": "import bisect\nfrom collections import defaultdict\nimport copy\nfrom itertools import repeat, chain\nimport math\nimport numpy as np\n\nimport torch\nimport torch.utils.data\nfrom torch.utils.data.sampler import BatchSampler, Sampler\nfrom torch.utils.model_zoo import tqdm\nimport torchvision\n\nfrom PIL import Image\n\n\ndef _repeat_to_at_least(iterable, n):\n    repeat_times = math.ceil(n / len(iterable))\n    repeated = chain.from_iterable(repeat(iterable, repeat_times))\n    return list(repeated)\n\n\nclass GroupedBatchSampler(BatchSampler):\n    \"\"\"\n    Wraps another sampler to yield a mini-batch of indices.\n    It enforces that the batch only contain elements from the same group.\n    It also tries to provide mini-batches which follows an ordering which is\n    as close as possible to the ordering from the original sampler.\n    Arguments:\n        sampler (Sampler): Base sampler.\n        group_ids (list[int]): If the sampler produces indices in range [0, N),\n            `group_ids` must be a list of `N` ints which contains the group id of each sample.\n            The group ids must be a continuous set of integers starting from\n            0, i.e. they must be in the range [0, num_groups).\n        batch_size (int): Size of mini-batch.\n    \"\"\"\n    def __init__(self, sampler, group_ids, batch_size):\n        if not isinstance(sampler, Sampler):\n            raise ValueError(\n                \"sampler should be an instance of \"\n                \"torch.utils.data.Sampler, but got sampler={}\".format(sampler)\n            )\n        self.sampler = sampler\n        self.group_ids = group_ids\n        self.batch_size = batch_size\n\n    def __iter__(self):\n        buffer_per_group = defaultdict(list)\n        samples_per_group = defaultdict(list)\n\n        num_batches = 0\n        for idx in self.sampler:\n            group_id = self.group_ids[idx]\n            buffer_per_group[group_id].append(idx)\n            samples_per_group[group_id].append(idx)\n            if len(buffer_per_group[group_id]) == self.batch_size:\n                yield buffer_per_group[group_id]\n                num_batches += 1\n                del buffer_per_group[group_id]\n            assert len(buffer_per_group[group_id]) < self.batch_size\n\n        # now we have run out of elements that satisfy\n        # the group criteria, let's return the remaining\n        # elements so that the size of the sampler is\n        # deterministic\n        expected_num_batches = len(self)\n        num_remaining = expected_num_batches - num_batches\n        if num_remaining > 0:\n            # for the remaining batches, take first the buffers with largest number\n            # of elements\n            for group_id, _ in sorted(buffer_per_group.items(),\n                                      key=lambda x: len(x[1]), reverse=True):\n                remaining = self.batch_size - len(buffer_per_group[group_id])\n                samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining)\n                buffer_per_group[group_id].extend(samples_from_group_id[:remaining])\n                assert len(buffer_per_group[group_id]) == self.batch_size\n                yield buffer_per_group[group_id]\n                num_remaining -= 1\n                if num_remaining == 0:\n                    break\n        assert num_remaining == 0\n\n    def __len__(self):\n        return len(self.sampler) // self.batch_size\n\n\ndef _compute_aspect_ratios_slow(dataset, indices=None):\n    print(\"Your dataset doesn't support the fast path for \"\n          \"computing the aspect ratios, so will iterate over \"\n          \"the full dataset and load every image instead. \"\n          \"This might take some time...\")\n    if indices is None:\n        indices = range(len(dataset))\n\n    class SubsetSampler(Sampler):\n        def __init__(self, indices):\n            self.indices = indices\n\n        def __iter__(self):\n            return iter(self.indices)\n\n        def __len__(self):\n            return len(self.indices)\n\n    sampler = SubsetSampler(indices)\n    data_loader = torch.utils.data.DataLoader(\n        dataset, batch_size=1, sampler=sampler,\n        num_workers=14,  # you might want to increase it for faster processing\n        collate_fn=lambda x: x[0])\n    aspect_ratios = []\n    with tqdm(total=len(dataset)) as pbar:\n        for _i, (img, _) in enumerate(data_loader):\n            pbar.update(1)\n            height, width = img.shape[-2:]\n            aspect_ratio = float(width) / float(height)\n            aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_custom_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n    aspect_ratios = []\n    for i in indices:\n        height, width = dataset.get_height_and_width(i)\n        aspect_ratio = float(width) / float(height)\n        aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_coco_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n    aspect_ratios = []\n    for i in indices:\n        img_info = dataset.coco.imgs[dataset.ids[i]]\n        aspect_ratio = float(img_info[\"width\"]) / float(img_info[\"height\"])\n        aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_voc_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n    aspect_ratios = []\n    for i in indices:\n        # this doesn't load the data into memory, because PIL loads it lazily\n        width, height = Image.open(dataset.images[i]).size\n        aspect_ratio = float(width) / float(height)\n        aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_subset_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n\n    ds_indices = [dataset.indices[i] for i in indices]\n    return compute_aspect_ratios(dataset.dataset, ds_indices)\n\n\ndef compute_aspect_ratios(dataset, indices=None):\n    if hasattr(dataset, \"get_height_and_width\"):\n        return _compute_aspect_ratios_custom_dataset(dataset, indices)\n\n    if isinstance(dataset, torchvision.datasets.CocoDetection):\n        return _compute_aspect_ratios_coco_dataset(dataset, indices)\n\n    if isinstance(dataset, torchvision.datasets.VOCDetection):\n        return _compute_aspect_ratios_voc_dataset(dataset, indices)\n\n    if isinstance(dataset, torch.utils.data.Subset):\n        return _compute_aspect_ratios_subset_dataset(dataset, indices)\n\n    # slow path\n    return _compute_aspect_ratios_slow(dataset, indices)\n\n\ndef _quantize(x, bins):\n    bins = copy.deepcopy(bins)\n    bins = sorted(bins)\n    # bisect_right：寻找y元素按顺序应该排在bins中哪个元素的右边，返回的是索引\n    quantized = list(map(lambda y: bisect.bisect_right(bins, y), x))\n    return quantized\n\n\ndef create_aspect_ratio_groups(dataset, k=0):\n    # 计算所有数据集中的图片width/height比例\n    aspect_ratios = compute_aspect_ratios(dataset)\n    # 将[0.5, 2]区间划分成2*k+1等份\n    bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0]\n\n    # 统计所有图像比例在bins区间中的位置索引\n    groups = _quantize(aspect_ratios, bins)\n    # count number of elements per group\n    # 统计每个区间的频次\n    counts = np.unique(groups, return_counts=True)[1]\n    fbins = [0] + bins + [np.inf]\n    print(\"Using {} as bins for aspect ratio quantization\".format(fbins))\n    print(\"Count of instances per bin: {}\".format(counts))\n    return groups\n"
  },
  {
    "path": "pytorch_object_detection/ssd/train_utils/train_eval_utils.py",
    "content": "import math\nimport sys\nimport time\n\nimport torch\n\nfrom train_utils import get_coco_api_from_dataset, CocoEvaluator\nimport train_utils.distributed_utils as utils\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch,\n                    print_freq=50, warmup=False):\n    model.train()\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))\n    header = 'Epoch: [{}]'.format(epoch)\n\n    lr_scheduler = None\n    if epoch == 0 and warmup is True:  # 当训练第一轮（epoch=0）时，启用warmup训练方式，可理解为热身训练\n        warmup_factor = 5.0 / 10000\n        warmup_iters = min(1000, len(data_loader) - 1)\n\n        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)\n\n    mloss = torch.zeros(1).to(device)  # mean losses\n    for i, [images, targets] in enumerate(metric_logger.log_every(data_loader, print_freq, header)):\n        # batch inputs information\n        images = torch.stack(images, dim=0)\n\n        boxes = []\n        labels = []\n        img_id = []\n        for t in targets:\n            boxes.append(t['boxes'])\n            labels.append(t['labels'])\n            img_id.append(t[\"image_id\"])\n        targets = {\"boxes\": torch.stack(boxes, dim=0),\n                   \"labels\": torch.stack(labels, dim=0),\n                   \"image_id\": torch.as_tensor(img_id)}\n\n        images = images.to(device)\n\n        targets = {k: v.to(device) for k, v in targets.items()}\n        losses_dict = model(images, targets)\n        losses = losses_dict[\"total_losses\"]\n\n        # reduce losses over all GPUs for logging purpose\n        losses_dict_reduced = utils.reduce_dict(losses_dict)\n        losses_reduce = losses_dict_reduced[\"total_losses\"]\n\n        loss_value = losses_reduce.detach()\n        # 记录训练损失\n        mloss = (mloss * i + loss_value) / (i + 1)  # update mean losses\n\n        if not math.isfinite(loss_value):  # 当计算的损失为无穷大时停止训练\n            print(\"Loss is {}, stopping training\".format(loss_value))\n            print(losses_dict_reduced)\n            sys.exit(1)\n\n        optimizer.zero_grad()\n        losses.backward()\n        optimizer.step()\n\n        if lr_scheduler is not None:  # 第一轮使用warmup训练方式\n            lr_scheduler.step()\n\n        # metric_logger.update(loss=losses, **loss_dict_reduced)\n        metric_logger.update(**losses_dict_reduced)\n        now_lr = optimizer.param_groups[0][\"lr\"]\n        metric_logger.update(lr=now_lr)\n\n    return mloss, now_lr\n\n\n@torch.no_grad()\ndef evaluate(model, data_loader, device, data_set=None):\n\n    cpu_device = torch.device(\"cpu\")\n    model.eval()\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    header = \"Test: \"\n\n    if data_set is None:\n        data_set = get_coco_api_from_dataset(data_loader.dataset)\n    iou_types = _get_iou_types(model)\n    coco_evaluator = CocoEvaluator(data_set, iou_types)\n\n    for images, targets in metric_logger.log_every(data_loader, 100, header):\n        images = torch.stack(images, dim=0).to(device)\n\n        if device != torch.device(\"cpu\"):\n            torch.cuda.synchronize(device)\n\n        model_time = time.time()\n        #  list((bboxes_out, labels_out, scores_out), ...)\n        results = model(images, targets=None)\n        model_time = time.time() - model_time\n\n        outputs = []\n        for index, (bboxes_out, labels_out, scores_out) in enumerate(results):\n            # 将box的相对坐标信息（0-1）转为绝对值坐标(xmin, ymin, xmax, ymax)\n            height_width = targets[index][\"height_width\"]\n            # 还原回原图尺度\n            bboxes_out[:, [0, 2]] = bboxes_out[:, [0, 2]] * height_width[1]\n            bboxes_out[:, [1, 3]] = bboxes_out[:, [1, 3]] * height_width[0]\n\n            info = {\"boxes\": bboxes_out.to(cpu_device),\n                    \"labels\": labels_out.to(cpu_device),\n                    \"scores\": scores_out.to(cpu_device)}\n            outputs.append(info)\n\n        res = {target[\"image_id\"].item(): output for target, output in zip(targets, outputs)}\n\n        evaluator_time = time.time()\n        coco_evaluator.update(res)\n        evaluator_time = time.time() - evaluator_time\n        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)\n\n    # gather the stats from all processes\n    metric_logger.synchronize_between_processes()\n    print(\"Averaged stats:\", metric_logger)\n    coco_evaluator.synchronize_between_processes()\n\n    # accumulate predictions from all images\n    coco_evaluator.accumulate()\n    coco_evaluator.summarize()\n\n    coco_info = coco_evaluator.coco_eval[iou_types[0]].stats.tolist()  # numpy to list\n\n    return coco_info\n\n\ndef _get_iou_types(model):\n    model_without_ddp = model\n    if isinstance(model, torch.nn.parallel.DistributedDataParallel):\n        model_without_ddp = model.module\n    iou_types = [\"bbox\"]\n    return iou_types\n"
  },
  {
    "path": "pytorch_object_detection/ssd/transforms.py",
    "content": "import random\n\nimport torch\nimport torchvision.transforms as t\nfrom torchvision.transforms import functional as F\n\nfrom src import dboxes300_coco, calc_iou_tensor, Encoder\n\n\nclass Compose(object):\n    \"\"\"组合多个transform函数\"\"\"\n    def __init__(self, transforms):\n        self.transforms = transforms\n\n    def __call__(self, image, target=None):\n        for trans in self.transforms:\n            image, target = trans(image, target)\n        return image, target\n\n\nclass ToTensor(object):\n    \"\"\"将PIL图像转为Tensor\"\"\"\n    def __call__(self, image, target):\n        image = F.to_tensor(image).contiguous()\n        return image, target\n\n\nclass RandomHorizontalFlip(object):\n    \"\"\"随机水平翻转图像以及bboxes,该方法应放在ToTensor后\"\"\"\n    def __init__(self, prob=0.5):\n        self.prob = prob\n\n    def __call__(self, image, target):\n        if random.random() < self.prob:\n            # height, width = image.shape[-2:]\n            image = image.flip(-1)  # 水平翻转图片\n            bbox = target[\"boxes\"]\n            # bbox: xmin, ymin, xmax, ymax\n            # bbox[:, [0, 2]] = width - bbox[:, [2, 0]]  # 翻转对应bbox坐标信息\n            bbox[:, [0, 2]] = 1.0 - bbox[:, [2, 0]]  # 翻转对应bbox坐标信息\n            target[\"boxes\"] = bbox\n        return image, target\n\n\n# This function is from https://github.com/chauhan-utk/ssd.DomainAdaptation.\nclass SSDCropping(object):\n    \"\"\"\n    根据原文，对图像进行裁剪,该方法应放在ToTensor前\n    Cropping for SSD, according to original paper\n    Choose between following 3 conditions:\n    1. Preserve the original image\n    2. Random crop minimum IoU is among 0.1, 0.3, 0.5, 0.7, 0.9\n    3. Random crop\n    Reference to https://github.com/chauhan-utk/src.DomainAdaptation\n    \"\"\"\n    def __init__(self):\n        self.sample_options = (\n            # Do nothing\n            None,\n            # min IoU, max IoU\n            (0.1, None),\n            (0.3, None),\n            (0.5, None),\n            (0.7, None),\n            (0.9, None),\n            # no IoU requirements\n            (None, None),\n        )\n        self.dboxes = dboxes300_coco()\n\n    def __call__(self, image, target):\n        # Ensure always return cropped image\n        while True:\n            mode = random.choice(self.sample_options)\n            if mode is None:  # 不做随机裁剪处理\n                return image, target\n\n            htot, wtot = target['height_width']\n\n            min_iou, max_iou = mode\n            min_iou = float('-inf') if min_iou is None else min_iou\n            max_iou = float('+inf') if max_iou is None else max_iou\n\n            # Implementation use 5 iteration to find possible candidate\n            for _ in range(5):\n                # 0.3*0.3 approx. 0.1\n                w = random.uniform(0.3, 1.0)\n                h = random.uniform(0.3, 1.0)\n\n                if w/h < 0.5 or w/h > 2:  # 保证宽高比例在0.5-2之间\n                    continue\n\n                # left 0 ~ wtot - w, top 0 ~ htot - h\n                left = random.uniform(0, 1.0 - w)\n                top = random.uniform(0, 1.0 - h)\n\n                right = left + w\n                bottom = top + h\n\n                # boxes的坐标是在0-1之间的\n                bboxes = target[\"boxes\"]\n                ious = calc_iou_tensor(bboxes, torch.tensor([[left, top, right, bottom]]))\n\n                # tailor all the bboxes and return\n                # all(): Returns True if all elements in the tensor are True, False otherwise.\n                if not ((ious > min_iou) & (ious < max_iou)).all():\n                    continue\n\n                # discard any bboxes whose center not in the cropped image\n                xc = 0.5 * (bboxes[:, 0] + bboxes[:, 2])\n                yc = 0.5 * (bboxes[:, 1] + bboxes[:, 3])\n\n                # 查找所有的gt box的中心点有没有在采样patch中的\n                masks = (xc > left) & (xc < right) & (yc > top) & (yc < bottom)\n\n                # if no such boxes, continue searching again\n                # 如果所有的gt box的中心点都不在采样的patch中，则重新找\n                if not masks.any():\n                    continue\n\n                # 修改采样patch中的所有gt box的坐标（防止出现越界的情况）\n                bboxes[bboxes[:, 0] < left, 0] = left\n                bboxes[bboxes[:, 1] < top, 1] = top\n                bboxes[bboxes[:, 2] > right, 2] = right\n                bboxes[bboxes[:, 3] > bottom, 3] = bottom\n\n                # 虑除不在采样patch中的gt box\n                bboxes = bboxes[masks, :]\n                # 获取在采样patch中的gt box的标签\n                labels = target['labels']\n                labels = labels[masks]\n\n                # 裁剪patch\n                left_idx = int(left * wtot)\n                top_idx = int(top * htot)\n                right_idx = int(right * wtot)\n                bottom_idx = int(bottom * htot)\n                image = image.crop((left_idx, top_idx, right_idx, bottom_idx))\n\n                # 调整裁剪后的bboxes坐标信息\n                bboxes[:, 0] = (bboxes[:, 0] - left) / w\n                bboxes[:, 1] = (bboxes[:, 1] - top) / h\n                bboxes[:, 2] = (bboxes[:, 2] - left) / w\n                bboxes[:, 3] = (bboxes[:, 3] - top) / h\n\n                # 更新crop后的gt box坐标信息以及标签信息\n                target['boxes'] = bboxes\n                target['labels'] = labels\n\n                return image, target\n\n\nclass Resize(object):\n    \"\"\"对图像进行resize处理,该方法应放在ToTensor前\"\"\"\n    def __init__(self, size=(300, 300)):\n        self.resize = t.Resize(size)\n\n    def __call__(self, image, target):\n        image = self.resize(image)\n        return image, target\n\n\nclass ColorJitter(object):\n    \"\"\"对图像颜色信息进行随机调整,该方法应放在ToTensor前\"\"\"\n    def __init__(self, brightness=0.125, contrast=0.5, saturation=0.5, hue=0.05):\n        self.trans = t.ColorJitter(brightness, contrast, saturation, hue)\n\n    def __call__(self, image, target):\n        image = self.trans(image)\n        return image, target\n\n\nclass Normalization(object):\n    \"\"\"对图像标准化处理,该方法应放在ToTensor后\"\"\"\n    def __init__(self, mean=None, std=None):\n        if mean is None:\n            mean = [0.485, 0.456, 0.406]\n        if std is None:\n            std = [0.229, 0.224, 0.225]\n        self.normalize = t.Normalize(mean=mean, std=std)\n\n    def __call__(self, image, target):\n        image = self.normalize(image)\n        return image, target\n\n\nclass AssignGTtoDefaultBox(object):\n    \"\"\"将DefaultBox与GT进行匹配\"\"\"\n    def __init__(self):\n        self.default_box = dboxes300_coco()\n        self.encoder = Encoder(self.default_box)\n\n    def __call__(self, image, target):\n        boxes = target['boxes']\n        labels = target[\"labels\"]\n        # bboxes_out (Tensor 8732 x 4), labels_out (Tensor 8732)\n        bboxes_out, labels_out = self.encoder.encode(boxes, labels)\n        target['boxes'] = bboxes_out\n        target['labels'] = labels_out\n\n        return image, target\n"
  },
  {
    "path": "pytorch_object_detection/ssd/validation.py",
    "content": "\"\"\"\n该脚本用于调用训练好的模型权重去计算验证集/测试集的COCO指标\n以及每个类别的mAP(IoU=0.5)\n\"\"\"\n\nimport os\nimport json\n\nimport torch\nfrom tqdm import tqdm\nimport numpy as np\n\nimport transforms\nfrom src import Backbone, SSD300\nfrom my_dataset import VOCDataSet\nfrom train_utils import get_coco_api_from_dataset, CocoEvaluator\n\n\ndef summarize(self, catId=None):\n    \"\"\"\n    Compute and display summary metrics for evaluation results.\n    Note this functin can *only* be applied on the default parameter setting\n    \"\"\"\n\n    def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100):\n        p = self.params\n        iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'\n        titleStr = 'Average Precision' if ap == 1 else 'Average Recall'\n        typeStr = '(AP)' if ap == 1 else '(AR)'\n        iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \\\n            if iouThr is None else '{:0.2f}'.format(iouThr)\n\n        aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]\n        mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]\n\n        if ap == 1:\n            # dimension of precision: [TxRxKxAxM]\n            s = self.eval['precision']\n            # IoU\n            if iouThr is not None:\n                t = np.where(iouThr == p.iouThrs)[0]\n                s = s[t]\n\n            if isinstance(catId, int):\n                s = s[:, :, catId, aind, mind]\n            else:\n                s = s[:, :, :, aind, mind]\n\n        else:\n            # dimension of recall: [TxKxAxM]\n            s = self.eval['recall']\n            if iouThr is not None:\n                t = np.where(iouThr == p.iouThrs)[0]\n                s = s[t]\n\n            if isinstance(catId, int):\n                s = s[:, catId, aind, mind]\n            else:\n                s = s[:, :, aind, mind]\n\n        if len(s[s > -1]) == 0:\n            mean_s = -1\n        else:\n            mean_s = np.mean(s[s > -1])\n\n        print_string = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)\n        return mean_s, print_string\n\n    stats, print_list = [0] * 12, [\"\"] * 12\n    stats[0], print_list[0] = _summarize(1)\n    stats[1], print_list[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2])\n    stats[2], print_list[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2])\n    stats[3], print_list[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2])\n    stats[4], print_list[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2])\n    stats[5], print_list[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2])\n    stats[6], print_list[6] = _summarize(0, maxDets=self.params.maxDets[0])\n    stats[7], print_list[7] = _summarize(0, maxDets=self.params.maxDets[1])\n    stats[8], print_list[8] = _summarize(0, maxDets=self.params.maxDets[2])\n    stats[9], print_list[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2])\n    stats[10], print_list[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2])\n    stats[11], print_list[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2])\n\n    print_info = \"\\n\".join(print_list)\n\n    if not self.eval:\n        raise Exception('Please run accumulate() first')\n\n    return stats, print_info\n\n\ndef main(parser_data):\n    device = torch.device(parser_data.device if torch.cuda.is_available() else \"cpu\")\n    print(\"Using {} device training.\".format(device.type))\n\n    data_transform = {\n        \"val\": transforms.Compose([transforms.Resize(),\n                                   transforms.ToTensor(),\n                                   transforms.Normalization()])\n    }\n\n    # read class_indict\n    label_json_path = './pascal_voc_classes.json'\n    assert os.path.exists(label_json_path), \"json file {} dose not exist.\".format(label_json_path)\n    with open(label_json_path, 'r') as f:\n        class_dict = json.load(f)\n\n    category_index = {v: k for k, v in class_dict.items()}\n\n    VOC_root = parser_data.data_path\n    # check voc root\n    if os.path.exists(os.path.join(VOC_root, \"VOCdevkit\")) is False:\n        raise FileNotFoundError(\"VOCdevkit dose not in path:'{}'.\".format(VOC_root))\n\n    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch\n    batch_size = parser_data.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using %g dataloader workers' % nw)\n\n    # load validation data set\n    # VOCdevkit -> VOC2012 -> ImageSets -> Main -> val.txt\n    val_dataset = VOCDataSet(VOC_root, \"2012\", transforms=data_transform[\"val\"], train_set=\"val.txt\")\n    val_dataset_loader = torch.utils.data.DataLoader(val_dataset,\n                                                     batch_size=batch_size,\n                                                     shuffle=False,\n                                                     num_workers=nw,\n                                                     pin_memory=True,\n                                                     collate_fn=val_dataset.collate_fn)\n\n    # create model num_classes equal background + 20 classes\n    backbone = Backbone()\n    model = SSD300(backbone=backbone, num_classes=parser_data.num_classes + 1)\n\n    # 载入你自己训练好的模型权重\n    weights_path = parser_data.weights\n    assert os.path.exists(weights_path), \"not found {} file.\".format(weights_path)\n    weights_dict = torch.load(weights_path, map_location='cpu')\n    weights_dict = weights_dict[\"model\"] if \"model\" in weights_dict else weights_dict\n    model.load_state_dict(weights_dict)\n    # print(model)\n\n    model.to(device)\n\n    # evaluate on the test dataset\n    coco = get_coco_api_from_dataset(val_dataset)\n    iou_types = [\"bbox\"]\n    coco_evaluator = CocoEvaluator(coco, iou_types)\n    cpu_device = torch.device(\"cpu\")\n\n    model.eval()\n    with torch.no_grad():\n        for images, targets in tqdm(val_dataset_loader, desc=\"validation...\"):\n            # 将图片传入指定设备device\n            images = torch.stack(images, dim=0).to(device)\n\n            # inference\n            results = model(images)\n\n            outputs = []\n            for index, (bboxes_out, labels_out, scores_out) in enumerate(results):\n                # 将box的相对坐标信息（0-1）转为绝对值坐标(xmin, ymin, xmax, ymax)\n                height_width = targets[index][\"height_width\"]\n                # 还原回原图尺度\n                bboxes_out[:, [0, 2]] = bboxes_out[:, [0, 2]] * height_width[1]\n                bboxes_out[:, [1, 3]] = bboxes_out[:, [1, 3]] * height_width[0]\n\n                info = {\"boxes\": bboxes_out.to(cpu_device),\n                        \"labels\": labels_out.to(cpu_device),\n                        \"scores\": scores_out.to(cpu_device)}\n                outputs.append(info)\n\n            res = {target[\"image_id\"].item(): output for target, output in zip(targets, outputs)}\n            coco_evaluator.update(res)\n\n    coco_evaluator.synchronize_between_processes()\n\n    # accumulate predictions from all images\n    coco_evaluator.accumulate()\n    coco_evaluator.summarize()\n\n    coco_eval = coco_evaluator.coco_eval[\"bbox\"]\n    # calculate COCO info for all classes\n    coco_stats, print_coco = summarize(coco_eval)\n\n    # calculate voc info for every classes(IoU=0.5)\n    voc_map_info_list = []\n    for i in range(len(category_index)):\n        stats, _ = summarize(coco_eval, catId=i)\n        voc_map_info_list.append(\" {:15}: {}\".format(category_index[i + 1], stats[1]))\n\n    print_voc = \"\\n\".join(voc_map_info_list)\n    print(print_voc)\n\n    # 将验证结果保存至txt文件中\n    with open(\"record_mAP.txt\", \"w\") as f:\n        record_lines = [\"COCO results:\",\n                        print_coco,\n                        \"\",\n                        \"mAP(IoU=0.5) for each category:\",\n                        print_voc]\n        f.write(\"\\n\".join(record_lines))\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 使用设备类型\n    parser.add_argument('--device', default='cuda', help='device')\n\n    # 检测目标类别数\n    parser.add_argument('--num-classes', type=int, default='20', help='number of classes')\n\n    # 数据集的根目录(VOCdevkit根目录)\n    parser.add_argument('--data-path', default='/data/', help='dataset root')\n\n    # 训练好的权重文件\n    parser.add_argument('--weights', default='./save_weights/model.pth', type=str, help='training weights')\n\n    # batch size\n    parser.add_argument('--batch_size', default=1, type=int, metavar='N',\n                        help='batch size when validation.')\n\n    args = parser.parse_args()\n\n    main(args)\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/README.md",
    "content": "# 训练COCO2017数据集\n\n## 该项目参考自pytorch官方torchvision模块中的源码(使用pycocotools处略有不同)\n* https://github.com/pytorch/vision/tree/master/references/detection\n\n## 环境配置：\n* Python3.6/3.7/3.8\n* Pytorch1.10.0\n* pycocotools(Linux:```pip install pycocotools```; Windows:```pip install pycocotools-windows```(不需要额外安装vs))\n* Ubuntu或Centos(不建议Windows)\n* 最好使用GPU训练\n* 详细环境配置见```requirements.txt```\n\n## 文件结构：\n```\n  ├── backbone: 特征提取网络，可以根据自己的要求选择，这里是以VGG16为例\n  ├── network_files: Faster R-CNN网络（包括Fast R-CNN以及RPN等模块）\n  ├── train_utils: 训练验证相关模块（包括pycocotools）\n  ├── my_dataset.py: 自定义dataset用于读取COCO2017数据集\n  ├── train.py: 以resnet50做为backbone进行训练\n  ├── train_multi_GPU.py: 针对使用多GPU的用户使用\n  ├── predict.py: 简易的预测脚本，使用训练好的权重进行预测测试\n  ├── validation.py: 利用训练好的权重验证/测试数据的COCO指标，并生成record_mAP.txt文件\n  └── transforms.py: 数据预处理（随机水平翻转图像以及bboxes、将PIL图像转为Tensor）\n```\n\n## 预训练权重下载地址（下载后放入项目根目录）：\n* Resnet50 https://download.pytorch.org/models/resnet50-19c8e357.pth\n* 注意，下载的预训练权重记得要重命名，比如在train.py中读取的是`resnet50.pth`文件，\n  不是`resnet50-19c8e357.pth`\n \n \n## 数据集，本例程使用的是COCO2017数据集\n* COCO官网地址：https://cocodataset.org/\n* 对数据集不了解的可以看下我写的博文：https://blog.csdn.net/qq_37541097/article/details/113247318\n* 这里以下载coco2017数据集为例，主要下载三个文件：\n    * `2017 Train images [118K/18GB]`：训练过程中使用到的所有图像文件\n    * `2017 Val images [5K/1GB]`：验证过程中使用到的所有图像文件\n    * `2017 Train/Val annotations [241MB]`：对应训练集和验证集的标注json文件\n* 都解压到`coco2017`文件夹下，可得到如下文件结构：\n```\n├── coco2017: 数据集根目录\n     ├── train2017: 所有训练图像文件夹(118287张)\n     ├── val2017: 所有验证图像文件夹(5000张)\n     └── annotations: 对应标注文件夹\n              ├── instances_train2017.json: 对应目标检测、分割任务的训练集标注文件\n              ├── instances_val2017.json: 对应目标检测、分割任务的验证集标注文件\n              ├── captions_train2017.json: 对应图像描述的训练集标注文件\n              ├── captions_val2017.json: 对应图像描述的验证集标注文件\n              ├── person_keypoints_train2017.json: 对应人体关键点检测的训练集标注文件\n              └── person_keypoints_val2017.json: 对应人体关键点检测的验证集标注文件夹\n```\n\n## 训练方法\n* 确保提前准备好数据集\n* 确保提前下载好对应预训练模型权重\n* 若要使用单GPU训练直接使用train.py训练脚本\n* 若要使用多GPU训练，使用`torchrun --nproc_per_node=8 train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量\n* 如果想指定使用哪些GPU设备可在指令前加上`CUDA_VISIBLE_DEVICES=0,3`(例如我只要使用设备中的第1块和第4块GPU设备)\n* `CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py`\n\n## 注意事项\n* 在使用训练脚本时，注意要将`--data-path`设置为自己存放`coco2017`文件夹所在的**根目录**\n* 训练过程中保存的`results.txt`是每个epoch在验证集上的COCO指标，前12个值是COCO指标，后面两个值是训练平均损失以及学习率\n* 在使用预测脚本时，要将`weights_path`设置为你自己生成的权重路径。\n* 使用validation文件时，注意确保你的验证集或者测试集中必须包含每个类别的目标，并且使用时只需要修改`--num-classes`、`--data-path`和`--weights-path`即可，其他代码尽量不要改动\n\n## 本项目训练得到的权重(Faster R-CNN + Resnet50)\n* 链接: https://pan.baidu.com/s/1iF-Yl_9TkFFeAy-JysfGSw  密码: d2d8\n* COCO2017验证集mAP：\n```\n Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.277\n Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.453\n Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.290\n Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.126\n Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.308\n Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.378\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.243\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.358\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.366\n Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.169\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.402\n Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.512\n```\n\n## 如果对Faster RCNN原理不是很理解可参考我的bilibili\n* https://b23.tv/sXcBSP\n\n## Faster RCNN框架图\n![Faster R-CNN](https://github.com/WZMIAOMIAO/deep-learning-for-image-processing/raw/master/pytorch_object_detection/faster_rcnn/fasterRCNN.png) \n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/backbone/__init__.py",
    "content": "from .resnet50_fpn_model import resnet50_fpn_backbone\nfrom .mobilenetv2_model import MobileNetV2\nfrom .vgg_model import vgg\nfrom .resnet import *\nfrom .feature_pyramid_network import BackboneWithFPN, LastLevelMaxPool\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/backbone/feature_pyramid_network.py",
    "content": "from collections import OrderedDict\n\nimport torch.nn as nn\nimport torch\nfrom torch import Tensor\nimport torch.nn.functional as F\n\nfrom torch.jit.annotations import Tuple, List, Dict\n\n\nclass IntermediateLayerGetter(nn.ModuleDict):\n    \"\"\"\n    Module wrapper that returns intermediate layers from a model\n    It has a strong assumption that the modules have been registered\n    into the model in the same order as they are used.\n    This means that one should **not** reuse the same nn.Module\n    twice in the forward if you want this to work.\n    Additionally, it is only able to query submodules that are directly\n    assigned to the model. So if `model` is passed, `model.feature1` can\n    be returned, but not `model.feature1.layer2`.\n    Arguments:\n        model (nn.Module): model on which we will extract the features\n        return_layers (Dict[name, new_name]): a dict containing the names\n            of the modules for which the activations will be returned as\n            the key of the dict, and the value of the dict is the name\n            of the returned activation (which the user can specify).\n    \"\"\"\n    __annotations__ = {\n        \"return_layers\": Dict[str, str],\n    }\n\n    def __init__(self, model, return_layers):\n        if not set(return_layers).issubset([name for name, _ in model.named_children()]):\n            raise ValueError(\"return_layers are not present in model\")\n\n        orig_return_layers = return_layers\n        return_layers = {str(k): str(v) for k, v in return_layers.items()}\n        layers = OrderedDict()\n\n        # 遍历模型子模块按顺序存入有序字典\n        # 只保存layer4及其之前的结构，舍去之后不用的结构\n        for name, module in model.named_children():\n            layers[name] = module\n            if name in return_layers:\n                del return_layers[name]\n            if not return_layers:\n                break\n\n        super().__init__(layers)\n        self.return_layers = orig_return_layers\n\n    def forward(self, x):\n        out = OrderedDict()\n        # 依次遍历模型的所有子模块，并进行正向传播，\n        # 收集layer1, layer2, layer3, layer4的输出\n        for name, module in self.items():\n            x = module(x)\n            if name in self.return_layers:\n                out_name = self.return_layers[name]\n                out[out_name] = x\n        return out\n\n\nclass BackboneWithFPN(nn.Module):\n    \"\"\"\n    Adds a FPN on top of a model.\n    Internally, it uses torchvision.models._utils.IntermediateLayerGetter to\n    extract a submodel that returns the feature maps specified in return_layers.\n    The same limitations of IntermediatLayerGetter apply here.\n    Arguments:\n        backbone (nn.Module)\n        return_layers (Dict[name, new_name]): a dict containing the names\n            of the modules for which the activations will be returned as\n            the key of the dict, and the value of the dict is the name\n            of the returned activation (which the user can specify).\n        in_channels_list (List[int]): number of channels for each feature map\n            that is returned, in the order they are present in the OrderedDict\n        out_channels (int): number of channels in the FPN.\n        extra_blocks: ExtraFPNBlock\n    Attributes:\n        out_channels (int): the number of channels in the FPN\n    \"\"\"\n\n    def __init__(self,\n                 backbone: nn.Module,\n                 return_layers=None,\n                 in_channels_list=None,\n                 out_channels=256,\n                 extra_blocks=None,\n                 re_getter=True):\n        super().__init__()\n\n        if extra_blocks is None:\n            extra_blocks = LastLevelMaxPool()\n\n        if re_getter:\n            assert return_layers is not None\n            self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)\n        else:\n            self.body = backbone\n\n        self.fpn = FeaturePyramidNetwork(\n            in_channels_list=in_channels_list,\n            out_channels=out_channels,\n            extra_blocks=extra_blocks,\n        )\n\n        self.out_channels = out_channels\n\n    def forward(self, x):\n        x = self.body(x)\n        x = self.fpn(x)\n        return x\n\n\nclass FeaturePyramidNetwork(nn.Module):\n    \"\"\"\n    Module that adds a FPN from on top of a set of feature maps. This is based on\n    `\"Feature Pyramid Network for Object Detection\" <https://arxiv.org/abs/1612.03144>`_.\n    The feature maps are currently supposed to be in increasing depth\n    order.\n    The input to the model is expected to be an OrderedDict[Tensor], containing\n    the feature maps on top of which the FPN will be added.\n    Arguments:\n        in_channels_list (list[int]): number of channels for each feature map that\n            is passed to the module\n        out_channels (int): number of channels of the FPN representation\n        extra_blocks (ExtraFPNBlock or None): if provided, extra operations will\n            be performed. It is expected to take the fpn features, the original\n            features and the names of the original features as input, and returns\n            a new list of feature maps and their corresponding names\n    \"\"\"\n\n    def __init__(self, in_channels_list, out_channels, extra_blocks=None):\n        super().__init__()\n        # 用来调整resnet特征矩阵(layer1,2,3,4)的channel（kernel_size=1）\n        self.inner_blocks = nn.ModuleList()\n        # 对调整后的特征矩阵使用3x3的卷积核来得到对应的预测特征矩阵\n        self.layer_blocks = nn.ModuleList()\n        for in_channels in in_channels_list:\n            if in_channels == 0:\n                continue\n            inner_block_module = nn.Conv2d(in_channels, out_channels, 1)\n            layer_block_module = nn.Conv2d(out_channels, out_channels, 3, padding=1)\n            self.inner_blocks.append(inner_block_module)\n            self.layer_blocks.append(layer_block_module)\n\n        # initialize parameters now to avoid modifying the initialization of top_blocks\n        for m in self.children():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_uniform_(m.weight, a=1)\n                nn.init.constant_(m.bias, 0)\n\n        self.extra_blocks = extra_blocks\n\n    def get_result_from_inner_blocks(self, x: Tensor, idx: int) -> Tensor:\n        \"\"\"\n        This is equivalent to self.inner_blocks[idx](x),\n        but torchscript doesn't support this yet\n        \"\"\"\n        num_blocks = len(self.inner_blocks)\n        if idx < 0:\n            idx += num_blocks\n        i = 0\n        out = x\n        for module in self.inner_blocks:\n            if i == idx:\n                out = module(x)\n            i += 1\n        return out\n\n    def get_result_from_layer_blocks(self, x: Tensor, idx: int) -> Tensor:\n        \"\"\"\n        This is equivalent to self.layer_blocks[idx](x),\n        but torchscript doesn't support this yet\n        \"\"\"\n        num_blocks = len(self.layer_blocks)\n        if idx < 0:\n            idx += num_blocks\n        i = 0\n        out = x\n        for module in self.layer_blocks:\n            if i == idx:\n                out = module(x)\n            i += 1\n        return out\n\n    def forward(self, x: Dict[str, Tensor]) -> Dict[str, Tensor]:\n        \"\"\"\n        Computes the FPN for a set of feature maps.\n        Arguments:\n            x (OrderedDict[Tensor]): feature maps for each feature level.\n        Returns:\n            results (OrderedDict[Tensor]): feature maps after FPN layers.\n                They are ordered from highest resolution first.\n        \"\"\"\n        # unpack OrderedDict into two lists for easier handling\n        names = list(x.keys())\n        x = list(x.values())\n\n        # 将resnet layer4的channel调整到指定的out_channels\n        # last_inner = self.inner_blocks[-1](x[-1])\n        last_inner = self.get_result_from_inner_blocks(x[-1], -1)\n        # result中保存着每个预测特征层\n        results = []\n        # 将layer4调整channel后的特征矩阵，通过3x3卷积后得到对应的预测特征矩阵\n        # results.append(self.layer_blocks[-1](last_inner))\n        results.append(self.get_result_from_layer_blocks(last_inner, -1))\n\n        for idx in range(len(x) - 2, -1, -1):\n            inner_lateral = self.get_result_from_inner_blocks(x[idx], idx)\n            feat_shape = inner_lateral.shape[-2:]\n            inner_top_down = F.interpolate(last_inner, size=feat_shape, mode=\"nearest\")\n            last_inner = inner_lateral + inner_top_down\n            results.insert(0, self.get_result_from_layer_blocks(last_inner, idx))\n\n        # 在layer4对应的预测特征层基础上生成预测特征矩阵5\n        if self.extra_blocks is not None:\n            results, names = self.extra_blocks(results, x, names)\n\n        # make it back an OrderedDict\n        out = OrderedDict([(k, v) for k, v in zip(names, results)])\n\n        return out\n\n\nclass LastLevelMaxPool(torch.nn.Module):\n    \"\"\"\n    Applies a max_pool2d on top of the last feature map\n    \"\"\"\n\n    def forward(self, x: List[Tensor], y: List[Tensor], names: List[str]) -> Tuple[List[Tensor], List[str]]:\n        names.append(\"pool\")\n        x.append(F.max_pool2d(x[-1], 1, 2, 0))\n        return x, names\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/backbone/mobilenetv2_model.py",
    "content": "from torch import nn\nimport torch\n\n\ndef _make_divisible(ch, divisor=8, min_ch=None):\n    \"\"\"\n    This function is taken from the original tf repo.\n    It ensures that all layers have a channel number that is divisible by 8\n    It can be seen here:\n    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py\n    \"\"\"\n    if min_ch is None:\n        min_ch = divisor\n    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)\n    # Make sure that round down does not go down by more than 10%.\n    if new_ch < 0.9 * ch:\n        new_ch += divisor\n    return new_ch\n\n\nclass ConvBNReLU(nn.Sequential):\n    def __init__(self, in_channel, out_channel, kernel_size=3, stride=1, groups=1, norm_layer=None):\n        padding = (kernel_size - 1) // 2\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n        super(ConvBNReLU, self).__init__(\n            nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, groups=groups, bias=False),\n            norm_layer(out_channel),\n            nn.ReLU6(inplace=True)\n        )\n\n\nclass InvertedResidual(nn.Module):\n    def __init__(self, in_channel, out_channel, stride, expand_ratio, norm_layer=None):\n        super(InvertedResidual, self).__init__()\n        hidden_channel = in_channel * expand_ratio\n        self.use_shortcut = stride == 1 and in_channel == out_channel\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n\n        layers = []\n        if expand_ratio != 1:\n            # 1x1 pointwise conv\n            layers.append(ConvBNReLU(in_channel, hidden_channel, kernel_size=1, norm_layer=norm_layer))\n        layers.extend([\n            # 3x3 depthwise conv\n            ConvBNReLU(hidden_channel, hidden_channel, stride=stride, groups=hidden_channel, norm_layer=norm_layer),\n            # 1x1 pointwise conv(linear)\n            nn.Conv2d(hidden_channel, out_channel, kernel_size=1, bias=False),\n            norm_layer(out_channel),\n        ])\n\n        self.conv = nn.Sequential(*layers)\n\n    def forward(self, x):\n        if self.use_shortcut:\n            return x + self.conv(x)\n        else:\n            return self.conv(x)\n\n\nclass MobileNetV2(nn.Module):\n    def __init__(self, num_classes=1000, alpha=1.0, round_nearest=8, weights_path=None, norm_layer=None):\n        super(MobileNetV2, self).__init__()\n        block = InvertedResidual\n        input_channel = _make_divisible(32 * alpha, round_nearest)\n        last_channel = _make_divisible(1280 * alpha, round_nearest)\n\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n\n        inverted_residual_setting = [\n            # t, c, n, s\n            [1, 16, 1, 1],\n            [6, 24, 2, 2],\n            [6, 32, 3, 2],\n            [6, 64, 4, 2],\n            [6, 96, 3, 1],\n            [6, 160, 3, 2],\n            [6, 320, 1, 1],\n        ]\n\n        features = []\n        # conv1 layer\n        features.append(ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer))\n        # building inverted residual residual blockes\n        for t, c, n, s in inverted_residual_setting:\n            output_channel = _make_divisible(c * alpha, round_nearest)\n            for i in range(n):\n                stride = s if i == 0 else 1\n                features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer))\n                input_channel = output_channel\n        # building last several layers\n        features.append(ConvBNReLU(input_channel, last_channel, 1, norm_layer=norm_layer))\n        # combine feature layers\n        self.features = nn.Sequential(*features)\n\n        # building classifier\n        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))\n        self.classifier = nn.Sequential(\n            nn.Dropout(0.2),\n            nn.Linear(last_channel, num_classes)\n        )\n\n        if weights_path is None:\n            # weight initialization\n            for m in self.modules():\n                if isinstance(m, nn.Conv2d):\n                    nn.init.kaiming_normal_(m.weight, mode='fan_out')\n                    if m.bias is not None:\n                        nn.init.zeros_(m.bias)\n                elif isinstance(m, nn.BatchNorm2d):\n                    nn.init.ones_(m.weight)\n                    nn.init.zeros_(m.bias)\n                elif isinstance(m, nn.Linear):\n                    nn.init.normal_(m.weight, 0, 0.01)\n                    nn.init.zeros_(m.bias)\n        else:\n            self.load_state_dict(torch.load(weights_path))\n\n    def forward(self, x):\n        x = self.features(x)\n        x = self.avgpool(x)\n        x = torch.flatten(x, 1)\n        x = self.classifier(x)\n        return x\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/backbone/resnet.py",
    "content": "import torch.nn as nn\nimport torch\n\n\nclass BasicBlock(nn.Module):\n    expansion = 1\n\n    def __init__(self, in_channel, out_channel, stride=1, downsample=None, **kwargs):\n        super(BasicBlock, self).__init__()\n        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,\n                               kernel_size=3, stride=stride, padding=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(out_channel)\n        self.relu = nn.ReLU()\n        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,\n                               kernel_size=3, stride=1, padding=1, bias=False)\n        self.bn2 = nn.BatchNorm2d(out_channel)\n        self.downsample = downsample\n\n    def forward(self, x):\n        identity = x\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass Bottleneck(nn.Module):\n    \"\"\"\n    注意：原论文中，在虚线残差结构的主分支上，第一个1x1卷积层的步距是2，第二个3x3卷积层步距是1。\n    但在pytorch官方实现过程中是第一个1x1卷积层的步距是1，第二个3x3卷积层步距是2，\n    这么做的好处是能够在top1上提升大概0.5%的准确率。\n    可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch\n    \"\"\"\n    expansion = 4\n\n    def __init__(self, in_channel, out_channel, stride=1, downsample=None,\n                 groups=1, width_per_group=64):\n        super(Bottleneck, self).__init__()\n\n        width = int(out_channel * (width_per_group / 64.)) * groups\n\n        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=width,\n                               kernel_size=1, stride=1, bias=False)  # squeeze channels\n        self.bn1 = nn.BatchNorm2d(width)\n        # -----------------------------------------\n        self.conv2 = nn.Conv2d(in_channels=width, out_channels=width, groups=groups,\n                               kernel_size=3, stride=stride, bias=False, padding=1)\n        self.bn2 = nn.BatchNorm2d(width)\n        # -----------------------------------------\n        self.conv3 = nn.Conv2d(in_channels=width, out_channels=out_channel*self.expansion,\n                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels\n        self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n\n    def forward(self, x):\n        identity = x\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass ResNet(nn.Module):\n\n    def __init__(self,\n                 block,\n                 blocks_num,\n                 num_classes=1000,\n                 include_top=True,\n                 groups=1,\n                 width_per_group=64):\n        super(ResNet, self).__init__()\n        self.include_top = include_top\n        self.in_channel = 64\n\n        self.groups = groups\n        self.width_per_group = width_per_group\n\n        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,\n                               padding=3, bias=False)\n        self.bn1 = nn.BatchNorm2d(self.in_channel)\n        self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.layer1 = self._make_layer(block, 64, blocks_num[0])\n        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)\n        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)\n        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)\n        if self.include_top:\n            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)\n            self.fc = nn.Linear(512 * block.expansion, num_classes)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')\n\n    def _make_layer(self, block, channel, block_num, stride=1):\n        downsample = None\n        if stride != 1 or self.in_channel != channel * block.expansion:\n            downsample = nn.Sequential(\n                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),\n                nn.BatchNorm2d(channel * block.expansion))\n\n        layers = []\n        layers.append(block(self.in_channel,\n                            channel,\n                            downsample=downsample,\n                            stride=stride,\n                            groups=self.groups,\n                            width_per_group=self.width_per_group))\n        self.in_channel = channel * block.expansion\n\n        for _ in range(1, block_num):\n            layers.append(block(self.in_channel,\n                                channel,\n                                groups=self.groups,\n                                width_per_group=self.width_per_group))\n\n        return nn.Sequential(*layers)\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n\n        if self.include_top:\n            x = self.avgpool(x)\n            x = torch.flatten(x, 1)\n            x = self.fc(x)\n\n        return x\n\n\ndef resnet34(num_classes=1000, include_top=True):\n    # https://download.pytorch.org/models/resnet34-333f7ec4.pth\n    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)\n\n\ndef resnet50(num_classes=1000, include_top=True):\n    # https://download.pytorch.org/models/resnet50-19c8e357.pth\n    return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)\n\n\ndef resnet101(num_classes=1000, include_top=True):\n    # https://download.pytorch.org/models/resnet101-5d3b4d8f.pth\n    return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)\n\n\ndef resnext50_32x4d(num_classes=1000, include_top=True):\n    # https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth\n    groups = 32\n    width_per_group = 4\n    return ResNet(Bottleneck, [3, 4, 6, 3],\n                  num_classes=num_classes,\n                  include_top=include_top,\n                  groups=groups,\n                  width_per_group=width_per_group)\n\n\ndef resnext101_32x8d(num_classes=1000, include_top=True):\n    # https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth\n    groups = 32\n    width_per_group = 8\n    return ResNet(Bottleneck, [3, 4, 23, 3],\n                  num_classes=num_classes,\n                  include_top=include_top,\n                  groups=groups,\n                  width_per_group=width_per_group)\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/backbone/resnet50_fpn_model.py",
    "content": "import os\n\nimport torch\nimport torch.nn as nn\nfrom torchvision.ops.misc import FrozenBatchNorm2d\n\nfrom .feature_pyramid_network import BackboneWithFPN, LastLevelMaxPool\n\n\nclass Bottleneck(nn.Module):\n    expansion = 4\n\n    def __init__(self, in_channel, out_channel, stride=1, downsample=None, norm_layer=None):\n        super().__init__()\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n\n        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,\n                               kernel_size=1, stride=1, bias=False)  # squeeze channels\n        self.bn1 = norm_layer(out_channel)\n        # -----------------------------------------\n        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,\n                               kernel_size=3, stride=stride, bias=False, padding=1)\n        self.bn2 = norm_layer(out_channel)\n        # -----------------------------------------\n        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel * self.expansion,\n                               kernel_size=1, stride=1, bias=False)  # unsqueeze channels\n        self.bn3 = norm_layer(out_channel * self.expansion)\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n\n    def forward(self, x):\n        identity = x\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass ResNet(nn.Module):\n\n    def __init__(self, block, blocks_num, num_classes=1000, include_top=True, norm_layer=None):\n        super().__init__()\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n        self._norm_layer = norm_layer\n\n        self.include_top = include_top\n        self.in_channel = 64\n\n        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,\n                               padding=3, bias=False)\n        self.bn1 = norm_layer(self.in_channel)\n        self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.layer1 = self._make_layer(block, 64, blocks_num[0])\n        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)\n        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)\n        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)\n        if self.include_top:\n            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)\n            self.fc = nn.Linear(512 * block.expansion, num_classes)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')\n\n    def _make_layer(self, block, channel, block_num, stride=1):\n        norm_layer = self._norm_layer\n        downsample = None\n        if stride != 1 or self.in_channel != channel * block.expansion:\n            downsample = nn.Sequential(\n                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),\n                norm_layer(channel * block.expansion))\n\n        layers = []\n        layers.append(block(self.in_channel, channel, downsample=downsample,\n                            stride=stride, norm_layer=norm_layer))\n        self.in_channel = channel * block.expansion\n\n        for _ in range(1, block_num):\n            layers.append(block(self.in_channel, channel, norm_layer=norm_layer))\n\n        return nn.Sequential(*layers)\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n\n        if self.include_top:\n            x = self.avgpool(x)\n            x = torch.flatten(x, 1)\n            x = self.fc(x)\n\n        return x\n\n\ndef overwrite_eps(model, eps):\n    \"\"\"\n    This method overwrites the default eps values of all the\n    FrozenBatchNorm2d layers of the model with the provided value.\n    This is necessary to address the BC-breaking change introduced\n    by the bug-fix at pytorch/vision#2933. The overwrite is applied\n    only when the pretrained weights are loaded to maintain compatibility\n    with previous versions.\n\n    Args:\n        model (nn.Module): The model on which we perform the overwrite.\n        eps (float): The new value of eps.\n    \"\"\"\n    for module in model.modules():\n        if isinstance(module, FrozenBatchNorm2d):\n            module.eps = eps\n\n\ndef resnet50_fpn_backbone(pretrain_path=\"\",\n                          norm_layer=FrozenBatchNorm2d,  # FrozenBatchNorm2d的功能与BatchNorm2d类似，但参数无法更新\n                          trainable_layers=3,\n                          returned_layers=None,\n                          extra_blocks=None):\n    \"\"\"\n    搭建resnet50_fpn——backbone\n    Args:\n        pretrain_path: resnet50的预训练权重，如果不使用就默认为空\n        norm_layer: 官方默认的是FrozenBatchNorm2d，即不会更新参数的bn层(因为如果batch_size设置的很小会导致效果更差，还不如不用bn层)\n                    如果自己的GPU显存很大可以设置很大的batch_size，那么自己可以传入正常的BatchNorm2d层\n                    (https://github.com/facebookresearch/maskrcnn-benchmark/issues/267)\n        trainable_layers: 指定训练哪些层结构\n        returned_layers: 指定哪些层的输出需要返回\n        extra_blocks: 在输出的特征层基础上额外添加的层结构\n\n    Returns:\n\n    \"\"\"\n    resnet_backbone = ResNet(Bottleneck, [3, 4, 6, 3],\n                             include_top=False,\n                             norm_layer=norm_layer)\n\n    if isinstance(norm_layer, FrozenBatchNorm2d):\n        overwrite_eps(resnet_backbone, 0.0)\n\n    if pretrain_path != \"\":\n        assert os.path.exists(pretrain_path), \"{} is not exist.\".format(pretrain_path)\n        # 载入预训练权重\n        print(resnet_backbone.load_state_dict(torch.load(pretrain_path), strict=False))\n\n    # select layers that wont be frozen\n    assert 0 <= trainable_layers <= 5\n    layers_to_train = ['layer4', 'layer3', 'layer2', 'layer1', 'conv1'][:trainable_layers]\n\n    # 如果要训练所有层结构的话，不要忘了conv1后还有一个bn1\n    if trainable_layers == 5:\n        layers_to_train.append(\"bn1\")\n\n    # freeze layers\n    for name, parameter in resnet_backbone.named_parameters():\n        # 只训练不在layers_to_train列表中的层结构\n        if all([not name.startswith(layer) for layer in layers_to_train]):\n            parameter.requires_grad_(False)\n\n    if extra_blocks is None:\n        extra_blocks = LastLevelMaxPool()\n\n    if returned_layers is None:\n        returned_layers = [1, 2, 3, 4]\n    # 返回的特征层个数肯定大于0小于5\n    assert min(returned_layers) > 0 and max(returned_layers) < 5\n\n    # return_layers = {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'}\n    return_layers = {f'layer{k}': str(v) for v, k in enumerate(returned_layers)}\n\n    # in_channel 为layer4的输出特征矩阵channel = 2048\n    in_channels_stage2 = resnet_backbone.in_channel // 8  # 256\n    # 记录resnet50提供给fpn的每个特征层channel\n    in_channels_list = [in_channels_stage2 * 2 ** (i - 1) for i in returned_layers]\n    # 通过fpn后得到的每个特征层的channel\n    out_channels = 256\n    return BackboneWithFPN(resnet_backbone, return_layers, in_channels_list, out_channels, extra_blocks=extra_blocks)\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/backbone/vgg_model.py",
    "content": "import torch.nn as nn\nimport torch\n\n\nclass VGG(nn.Module):\n    def __init__(self, features, class_num=1000, init_weights=False, weights_path=None):\n        super(VGG, self).__init__()\n        self.features = features\n        self.classifier = nn.Sequential(\n            nn.Linear(512*7*7, 4096),\n            nn.ReLU(True),\n            nn.Dropout(p=0.5),\n            nn.Linear(4096, 4096),\n            nn.ReLU(True),\n            nn.Dropout(p=0.5),\n            nn.Linear(4096, class_num)\n        )\n        if init_weights and weights_path is None:\n            self._initialize_weights()\n\n        if weights_path is not None:\n            self.load_state_dict(torch.load(weights_path))\n\n    def forward(self, x):\n        # N x 3 x 224 x 224\n        x = self.features(x)\n        # N x 512 x 7 x 7\n        x = torch.flatten(x, start_dim=1)\n        # N x 512*7*7\n        x = self.classifier(x)\n        return x\n\n    def _initialize_weights(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')\n                nn.init.xavier_uniform_(m.weight)\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.Linear):\n                nn.init.xavier_uniform_(m.weight)\n                # nn.init.normal_(m.weight, 0, 0.01)\n                nn.init.constant_(m.bias, 0)\n\n\ndef make_features(cfg: list):\n    layers = []\n    in_channels = 3\n    for v in cfg:\n        if v == \"M\":\n            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]\n        else:\n            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)\n            layers += [conv2d, nn.ReLU(True)]\n            in_channels = v\n    return nn.Sequential(*layers)\n\n\ncfgs = {\n    'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],\n    'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],\n    'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],\n    'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],\n}\n\n\ndef vgg(model_name=\"vgg16\", weights_path=None):\n    assert model_name in cfgs, \"Warning: model number {} not in cfgs dict!\".format(model_name)\n    cfg = cfgs[model_name]\n\n    model = VGG(make_features(cfg), weights_path=weights_path)\n    return model\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/change_backbone_with_fpn.py",
    "content": "import os\nimport datetime\n\nimport torch\n\nimport transforms\nfrom network_files import FasterRCNN, AnchorsGenerator\nfrom my_dataset import CocoDetection\nfrom train_utils import GroupedBatchSampler, create_aspect_ratio_groups\nfrom train_utils import train_eval_utils as utils\nfrom backbone import BackboneWithFPN, LastLevelMaxPool\n\n\ndef create_model(num_classes):\n    import torchvision\n    from torchvision.models.feature_extraction import create_feature_extractor\n\n    # --- mobilenet_v3_large fpn backbone --- #\n    backbone = torchvision.models.mobilenet_v3_large(pretrained=True)\n    # print(backbone)\n    return_layers = {\"features.6\": \"0\",   # stride 8\n                     \"features.12\": \"1\",  # stride 16\n                     \"features.16\": \"2\"}  # stride 32\n    # 提供给fpn的每个特征层channel\n    in_channels_list = [40, 112, 960]\n    new_backbone = create_feature_extractor(backbone, return_layers)\n    # img = torch.randn(1, 3, 224, 224)\n    # outputs = new_backbone(img)\n    # [print(f\"{k} shape: {v.shape}\") for k, v in outputs.items()]\n\n    # --- efficientnet_b0 fpn backbone --- #\n    # backbone = torchvision.models.efficientnet_b0(pretrained=True)\n    # # print(backbone)\n    # return_layers = {\"features.3\": \"0\",  # stride 8\n    #                  \"features.4\": \"1\",  # stride 16\n    #                  \"features.8\": \"2\"}  # stride 32\n    # # 提供给fpn的每个特征层channel\n    # in_channels_list = [40, 80, 1280]\n    # new_backbone = create_feature_extractor(backbone, return_layers)\n    # # img = torch.randn(1, 3, 224, 224)\n    # # outputs = new_backbone(img)\n    # # [print(f\"{k} shape: {v.shape}\") for k, v in outputs.items()]\n\n    backbone_with_fpn = BackboneWithFPN(new_backbone,\n                                        return_layers=return_layers,\n                                        in_channels_list=in_channels_list,\n                                        out_channels=256,\n                                        extra_blocks=LastLevelMaxPool(),\n                                        re_getter=False)\n\n    anchor_sizes = ((64,), (128,), (256,), (512,))\n    aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)\n    anchor_generator = AnchorsGenerator(sizes=anchor_sizes,\n                                        aspect_ratios=aspect_ratios)\n\n    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0', '1', '2'],  # 在哪些特征层上进行RoIAlign pooling\n                                                    output_size=[7, 7],  # RoIAlign pooling输出特征矩阵尺寸\n                                                    sampling_ratio=2)  # 采样率\n\n    model = FasterRCNN(backbone=backbone_with_fpn,\n                       num_classes=num_classes,\n                       rpn_anchor_generator=anchor_generator,\n                       box_roi_pool=roi_pooler)\n\n    return model\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n    print(\"Using {} device training.\".format(device.type))\n\n    # 用来保存coco_info的文件\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.ToTensor(),\n                                     transforms.RandomHorizontalFlip(0.5)]),\n        \"val\": transforms.Compose([transforms.ToTensor()])\n    }\n\n    COCO_root = args.data_path\n\n    # load train data set\n    # coco2017 -> annotations -> instances_train2017.json\n    train_dataset = CocoDetection(COCO_root, \"train\", data_transform[\"train\"])\n    train_sampler = None\n\n    # 是否按图片相似高宽比采样图片组成batch\n    # 使用的话能够减小训练时所需GPU显存，默认使用\n    if args.aspect_ratio_group_factor >= 0:\n        train_sampler = torch.utils.data.RandomSampler(train_dataset)\n        # 统计所有图像高宽比例在bins区间中的位置索引\n        group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor)\n        # 每个batch图片从同一高宽比例区间中取\n        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)\n\n    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch\n    batch_size = args.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using %g dataloader workers' % nw)\n    if train_sampler:\n        # 如果按照图片高宽比采样图片，dataloader中需要使用batch_sampler\n        train_data_loader = torch.utils.data.DataLoader(train_dataset,\n                                                        batch_sampler=train_batch_sampler,\n                                                        pin_memory=True,\n                                                        num_workers=nw,\n                                                        collate_fn=train_dataset.collate_fn)\n    else:\n        train_data_loader = torch.utils.data.DataLoader(train_dataset,\n                                                        batch_size=batch_size,\n                                                        shuffle=True,\n                                                        pin_memory=True,\n                                                        num_workers=nw,\n                                                        collate_fn=train_dataset.collate_fn)\n\n    # load validation data set\n    # coco2017 -> annotations -> instances_val2017.json\n    val_dataset = CocoDetection(COCO_root, \"val\", data_transform[\"val\"])\n    val_data_set_loader = torch.utils.data.DataLoader(val_dataset,\n                                                      batch_size=1,\n                                                      shuffle=False,\n                                                      pin_memory=True,\n                                                      num_workers=nw,\n                                                      collate_fn=val_dataset.collate_fn)\n\n    # create model num_classes equal background + classes\n    model = create_model(num_classes=args.num_classes + 1)\n    # print(model)\n\n    model.to(device)\n\n    # define optimizer\n    params = [p for p in model.parameters() if p.requires_grad]\n    optimizer = torch.optim.SGD(params,\n                                lr=args.lr,\n                                momentum=args.momentum,\n                                weight_decay=args.weight_decay)\n\n    scaler = torch.cuda.amp.GradScaler() if args.amp else None\n\n    # learning rate scheduler\n    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,\n                                                        milestones=args.lr_steps,\n                                                        gamma=args.lr_gamma)\n\n    # 如果指定了上次训练保存的权重文件地址，则接着上次结果接着训练\n    if args.resume != \"\":\n        checkpoint = torch.load(args.resume, map_location='cpu')\n        model.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n        if args.amp and \"scaler\" in checkpoint:\n            scaler.load_state_dict(checkpoint[\"scaler\"])\n        print(\"the training process from epoch{}...\".format(args.start_epoch))\n\n    train_loss = []\n    learning_rate = []\n    val_map = []\n\n    for epoch in range(args.start_epoch, args.epochs):\n        # train for one epoch, printing every 10 iterations\n        mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader,\n                                              device=device, epoch=epoch,\n                                              print_freq=50, warmup=True,\n                                              scaler=scaler)\n        train_loss.append(mean_loss.item())\n        learning_rate.append(lr)\n\n        # update the learning rate\n        lr_scheduler.step()\n\n        # evaluate on the test dataset\n        coco_info = utils.evaluate(model, val_data_set_loader, device=device)\n\n        # write into txt\n        with open(results_file, \"a\") as f:\n            # 写入的数据包括coco指标还有loss和learning rate\n            result_info = [f\"{i:.4f}\" for i in coco_info + [mean_loss.item()]] + [f\"{lr:.6f}\"]\n            txt = \"epoch:{} {}\".format(epoch, '  '.join(result_info))\n            f.write(txt + \"\\n\")\n\n        val_map.append(coco_info[1])  # pascal mAP\n\n        # save weights\n        save_files = {\n            'model': model.state_dict(),\n            'optimizer': optimizer.state_dict(),\n            'lr_scheduler': lr_scheduler.state_dict(),\n            'epoch': epoch}\n        if args.amp:\n            save_files[\"scaler\"] = scaler.state_dict()\n        torch.save(save_files, \"./save_weights/model-{}.pth\".format(epoch))\n\n    # plot loss and lr curve\n    if len(train_loss) != 0 and len(learning_rate) != 0:\n        from plot_curve import plot_loss_and_lr\n        plot_loss_and_lr(train_loss, learning_rate)\n\n    # plot mAP curve\n    if len(val_map) != 0:\n        from plot_curve import plot_map\n        plot_map(val_map)\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 训练设备类型\n    parser.add_argument('--device', default='cuda:0', help='device')\n    # 训练数据集的根目录\n    parser.add_argument('--data-path', default='/data/coco2017', help='dataset')\n    # 检测目标类别数(不包含背景)\n    parser.add_argument('--num-classes', default=90, type=int, help='num_classes')\n    # 文件保存地址\n    parser.add_argument('--output-dir', default='./save_weights', help='path where to save')\n    # 若需要接着上次训练，则指定上次训练保存权重文件地址\n    parser.add_argument('--resume', default='', type=str, help='resume from checkpoint')\n    # 指定接着从哪个epoch数开始训练\n    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')\n    # 训练的总epoch数\n    parser.add_argument('--epochs', default=26, type=int, metavar='N',\n                        help='number of total epochs to run')\n    # 学习率\n    parser.add_argument('--lr', default=0.005, type=float,\n                        help='initial learning rate, 0.02 is the default value for training '\n                             'on 8 gpus and 2 images_per_gpu')\n    # SGD的momentum参数\n    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',\n                        help='momentum')\n    # SGD的weight_decay参数\n    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,\n                        metavar='W', help='weight decay (default: 1e-4)',\n                        dest='weight_decay')\n    # 针对torch.optim.lr_scheduler.MultiStepLR的参数\n    parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int,\n                        help='decrease lr every step-size epochs')\n    # 针对torch.optim.lr_scheduler.MultiStepLR的参数\n    parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')\n    # 训练的batch size\n    parser.add_argument('--batch_size', default=4, type=int, metavar='N',\n                        help='batch size when training.')\n    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)\n    # 是否使用混合精度训练(需要GPU支持混合精度)\n    parser.add_argument(\"--amp\", default=False, help=\"Use torch.cuda.amp for mixed precision training\")\n\n    args = parser.parse_args()\n    print(args)\n\n    # 检查保存权重文件夹是否存在，不存在则创建\n    if not os.path.exists(args.output_dir):\n        os.makedirs(args.output_dir)\n\n    main(args)\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/coco91_indices.json",
    "content": "{\n    \"1\": \"person\",\n    \"2\": \"bicycle\",\n    \"3\": \"car\",\n    \"4\": \"motorcycle\",\n    \"5\": \"airplane\",\n    \"6\": \"bus\",\n    \"7\": \"train\",\n    \"8\": \"truck\",\n    \"9\": \"boat\",\n    \"10\": \"traffic light\",\n    \"11\": \"fire hydrant\",\n    \"12\": \"N/A\",\n    \"13\": \"stop sign\",\n    \"14\": \"parking meter\",\n    \"15\": \"bench\",\n    \"16\": \"bird\",\n    \"17\": \"cat\",\n    \"18\": \"dog\",\n    \"19\": \"horse\",\n    \"20\": \"sheep\",\n    \"21\": \"cow\",\n    \"22\": \"elephant\",\n    \"23\": \"bear\",\n    \"24\": \"zebra\",\n    \"25\": \"giraffe\",\n    \"26\": \"N/A\",\n    \"27\": \"backpack\",\n    \"28\": \"umbrella\",\n    \"29\": \"N/A\",\n    \"30\": \"N/A\",\n    \"31\": \"handbag\",\n    \"32\": \"tie\",\n    \"33\": \"suitcase\",\n    \"34\": \"frisbee\",\n    \"35\": \"skis\",\n    \"36\": \"snowboard\",\n    \"37\": \"sports ball\",\n    \"38\": \"kite\",\n    \"39\": \"baseball bat\",\n    \"40\": \"baseball glove\",\n    \"41\": \"skateboard\",\n    \"42\": \"surfboard\",\n    \"43\": \"tennis racket\",\n    \"44\": \"bottle\",\n    \"45\": \"N/A\",\n    \"46\": \"wine glass\",\n    \"47\": \"cup\",\n    \"48\": \"fork\",\n    \"49\": \"knife\",\n    \"50\": \"spoon\",\n    \"51\": \"bowl\",\n    \"52\": \"banana\",\n    \"53\": \"apple\",\n    \"54\": \"sandwich\",\n    \"55\": \"orange\",\n    \"56\": \"broccoli\",\n    \"57\": \"carrot\",\n    \"58\": \"hot dog\",\n    \"59\": \"pizza\",\n    \"60\": \"donut\",\n    \"61\": \"cake\",\n    \"62\": \"chair\",\n    \"63\": \"couch\",\n    \"64\": \"potted plant\",\n    \"65\": \"bed\",\n    \"66\": \"N/A\",\n    \"67\": \"dining table\",\n    \"68\": \"N/A\",\n    \"69\": \"N/A\",\n    \"70\": \"toilet\",\n    \"71\": \"N/A\",\n    \"72\": \"tv\",\n    \"73\": \"laptop\",\n    \"74\": \"mouse\",\n    \"75\": \"remote\",\n    \"76\": \"keyboard\",\n    \"77\": \"cell phone\",\n    \"78\": \"microwave\",\n    \"79\": \"oven\",\n    \"80\": \"toaster\",\n    \"81\": \"sink\",\n    \"82\": \"refrigerator\",\n    \"83\": \"N/A\",\n    \"84\": \"book\",\n    \"85\": \"clock\",\n    \"86\": \"vase\",\n    \"87\": \"scissors\",\n    \"88\": \"teddy bear\",\n    \"89\": \"hair drier\",\n    \"90\": \"toothbrush\"\n}"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/compute_receptive_field.py",
    "content": "# vgg16(D)\nmodel = [[3, 1],\n         [3, 1],\n         [2, 2],  # maxpool\n         [3, 1],\n         [3, 1],\n         [2, 2],  # maxpool\n         [3, 1],\n         [3, 1],\n         [3, 1],\n         [2, 2],  # maxpool\n         [3, 1],\n         [3, 1],\n         [3, 1],\n         [2, 2],  # maxpool\n         [3, 1],\n         [3, 1],\n         [3, 1]]\n\nfield = model[-1][0]\nfor kernel, stride in model[::-1]:\n    field = (field - 1) * stride + kernel\nprint(field)  # 228\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/draw_box_utils.py",
    "content": "from PIL.Image import Image, fromarray\nimport PIL.ImageDraw as ImageDraw\nimport PIL.ImageFont as ImageFont\nfrom PIL import ImageColor\nimport numpy as np\n\nSTANDARD_COLORS = [\n    'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',\n    'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',\n    'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',\n    'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',\n    'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',\n    'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',\n    'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',\n    'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',\n    'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',\n    'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',\n    'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',\n    'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',\n    'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',\n    'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',\n    'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',\n    'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',\n    'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',\n    'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',\n    'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',\n    'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',\n    'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',\n    'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',\n    'WhiteSmoke', 'Yellow', 'YellowGreen'\n]\n\n\ndef draw_text(draw,\n              box: list,\n              cls: int,\n              score: float,\n              category_index: dict,\n              color: str,\n              font: str = 'arial.ttf',\n              font_size: int = 24):\n    \"\"\"\n    将目标边界框和类别信息绘制到图片上\n    \"\"\"\n    try:\n        font = ImageFont.truetype(font, font_size)\n    except IOError:\n        font = ImageFont.load_default()\n\n    left, top, right, bottom = box\n    # If the total height of the display strings added to the top of the bounding\n    # box exceeds the top of the image, stack the strings below the bounding box\n    # instead of above.\n    display_str = f\"{category_index[str(cls)]}: {int(100 * score)}%\"\n    display_str_heights = [font.getsize(ds)[1] for ds in display_str]\n    # Each display_str has a top and bottom margin of 0.05x.\n    display_str_height = (1 + 2 * 0.05) * max(display_str_heights)\n\n    if top > display_str_height:\n        text_top = top - display_str_height\n        text_bottom = top\n    else:\n        text_top = bottom\n        text_bottom = bottom + display_str_height\n\n    for ds in display_str:\n        text_width, text_height = font.getsize(ds)\n        margin = np.ceil(0.05 * text_width)\n        draw.rectangle([(left, text_top),\n                        (left + text_width + 2 * margin, text_bottom)], fill=color)\n        draw.text((left + margin, text_top),\n                  ds,\n                  fill='black',\n                  font=font)\n        left += text_width\n\n\ndef draw_masks(image, masks, colors, thresh: float = 0.7, alpha: float = 0.5):\n    np_image = np.array(image)\n    masks = np.where(masks > thresh, True, False)\n\n    # colors = np.array(colors)\n    img_to_draw = np.copy(np_image)\n    # TODO: There might be a way to vectorize this\n    for mask, color in zip(masks, colors):\n        img_to_draw[mask] = color\n\n    out = np_image * (1 - alpha) + img_to_draw * alpha\n    return fromarray(out.astype(np.uint8))\n\n\ndef draw_objs(image: Image,\n              boxes: np.ndarray = None,\n              classes: np.ndarray = None,\n              scores: np.ndarray = None,\n              masks: np.ndarray = None,\n              category_index: dict = None,\n              box_thresh: float = 0.1,\n              mask_thresh: float = 0.5,\n              line_thickness: int = 8,\n              font: str = 'arial.ttf',\n              font_size: int = 24,\n              draw_boxes_on_image: bool = True,\n              draw_masks_on_image: bool = False):\n    \"\"\"\n    将目标边界框信息，类别信息，mask信息绘制在图片上\n    Args:\n        image: 需要绘制的图片\n        boxes: 目标边界框信息\n        classes: 目标类别信息\n        scores: 目标概率信息\n        masks: 目标mask信息\n        category_index: 类别与名称字典\n        box_thresh: 过滤的概率阈值\n        mask_thresh:\n        line_thickness: 边界框宽度\n        font: 字体类型\n        font_size: 字体大小\n        draw_boxes_on_image:\n        draw_masks_on_image:\n\n    Returns:\n\n    \"\"\"\n\n    # 过滤掉低概率的目标\n    idxs = np.greater(scores, box_thresh)\n    boxes = boxes[idxs]\n    classes = classes[idxs]\n    scores = scores[idxs]\n    if masks is not None:\n        masks = masks[idxs]\n    if len(boxes) == 0:\n        return image\n\n    colors = [ImageColor.getrgb(STANDARD_COLORS[cls % len(STANDARD_COLORS)]) for cls in classes]\n\n    if draw_boxes_on_image:\n        # Draw all boxes onto image.\n        draw = ImageDraw.Draw(image)\n        for box, cls, score, color in zip(boxes, classes, scores, colors):\n            left, top, right, bottom = box\n            # 绘制目标边界框\n            draw.line([(left, top), (left, bottom), (right, bottom),\n                       (right, top), (left, top)], width=line_thickness, fill=color)\n            # 绘制类别和概率信息\n            draw_text(draw, box.tolist(), int(cls), float(score), category_index, color, font, font_size)\n\n    if draw_masks_on_image and (masks is not None):\n        # Draw all mask onto image.\n        image = draw_masks(image, masks, colors, mask_thresh)\n\n    return image\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/my_dataset.py",
    "content": "import os\nimport json\n\nimport torch\nfrom PIL import Image\nimport torch.utils.data as data\nfrom pycocotools.coco import COCO\n\n\ndef _coco_remove_images_without_annotations(dataset, ids):\n    \"\"\"\n    删除coco数据集中没有目标，或者目标面积非常小的数据\n    refer to:\n    https://github.com/pytorch/vision/blob/master/references/detection/coco_utils.py\n    :param dataset:\n    :param cat_list:\n    :return:\n    \"\"\"\n    def _has_only_empty_bbox(anno):\n        return all(any(o <= 1 for o in obj[\"bbox\"][2:]) for obj in anno)\n\n    def _has_valid_annotation(anno):\n        # if it's empty, there is no annotation\n        if len(anno) == 0:\n            return False\n        # if all boxes have close to zero area, there is no annotation\n        if _has_only_empty_bbox(anno):\n            return False\n\n        return True\n\n    valid_ids = []\n    for ds_idx, img_id in enumerate(ids):\n        ann_ids = dataset.getAnnIds(imgIds=img_id, iscrowd=None)\n        anno = dataset.loadAnns(ann_ids)\n\n        if _has_valid_annotation(anno):\n            valid_ids.append(img_id)\n\n    return valid_ids\n\n\nclass CocoDetection(data.Dataset):\n    \"\"\"`MS Coco Detection <https://cocodataset.org/>`_ Dataset.\n\n    Args:\n        root (string): Root directory where images are downloaded to.\n        annFile (string): Path to json annotation file.\n        transforms (callable, optional): A function/transform that takes input sample and its target as entry\n            and returns a transformed version.\n    \"\"\"\n\n    def __init__(self, root, dataset=\"train\", transforms=None):\n        super(CocoDetection, self).__init__()\n        assert dataset in [\"train\", \"val\"], 'dataset must be in [\"train\", \"val\"]'\n        anno_file = \"instances_{}2017.json\".format(dataset)\n        assert os.path.exists(root), \"file '{}' does not exist.\".format(root)\n        self.img_root = os.path.join(root, \"{}2017\".format(dataset))\n        assert os.path.exists(self.img_root), \"path '{}' does not exist.\".format(self.img_root)\n        self.anno_path = os.path.join(root, \"annotations\", anno_file)\n        assert os.path.exists(self.anno_path), \"file '{}' does not exist.\".format(self.anno_path)\n\n        self.mode = dataset\n        self.transforms = transforms\n        self.coco = COCO(self.anno_path)\n\n        # 获取coco数据索引与类别名称的关系\n        # 注意在object80中的索引并不是连续的，虽然只有80个类别，但索引还是按照stuff91来排序的\n        data_classes = dict([(v[\"id\"], v[\"name\"]) for k, v in self.coco.cats.items()])\n        max_index = max(data_classes.keys())  # 90\n        # 将缺失的类别名称设置成N/A\n        coco_classes = {}\n        for k in range(1, max_index + 1):\n            if k in data_classes:\n                coco_classes[k] = data_classes[k]\n            else:\n                coco_classes[k] = \"N/A\"\n\n        if dataset == \"train\":\n            json_str = json.dumps(coco_classes, indent=4)\n            with open(\"coco91_indices.json\", \"w\") as f:\n                f.write(json_str)\n\n        self.coco_classes = coco_classes\n\n        ids = list(sorted(self.coco.imgs.keys()))\n        if dataset == \"train\":\n            # 移除没有目标，或者目标面积非常小的数据\n            valid_ids = _coco_remove_images_without_annotations(self.coco, ids)\n            self.ids = valid_ids\n        else:\n            self.ids = ids\n\n    def parse_targets(self,\n                      img_id: int,\n                      coco_targets: list,\n                      w: int = None,\n                      h: int = None):\n        assert w > 0\n        assert h > 0\n\n        # 只筛选出单个对象的情况\n        anno = [obj for obj in coco_targets if obj['iscrowd'] == 0]\n\n        boxes = [obj[\"bbox\"] for obj in anno]\n\n        # guard against no boxes via resizing\n        boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)\n        # [xmin, ymin, w, h] -> [xmin, ymin, xmax, ymax]\n        boxes[:, 2:] += boxes[:, :2]\n        boxes[:, 0::2].clamp_(min=0, max=w)\n        boxes[:, 1::2].clamp_(min=0, max=h)\n\n        classes = [obj[\"category_id\"] for obj in anno]\n        classes = torch.tensor(classes, dtype=torch.int64)\n\n        area = torch.tensor([obj[\"area\"] for obj in anno])\n        iscrowd = torch.tensor([obj[\"iscrowd\"] for obj in anno])\n\n        # 筛选出合法的目标，即x_max>x_min且y_max>y_min\n        keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])\n        boxes = boxes[keep]\n        classes = classes[keep]\n        area = area[keep]\n        iscrowd = iscrowd[keep]\n\n        target = {}\n        target[\"boxes\"] = boxes\n        target[\"labels\"] = classes\n        target[\"image_id\"] = torch.tensor([img_id])\n\n        # for conversion to coco api\n        target[\"area\"] = area\n        target[\"iscrowd\"] = iscrowd\n\n        return target\n\n    def __getitem__(self, index):\n        \"\"\"\n        Args:\n            index (int): Index\n\n        Returns:\n            tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``.\n        \"\"\"\n        coco = self.coco\n        img_id = self.ids[index]\n        ann_ids = coco.getAnnIds(imgIds=img_id)\n        coco_target = coco.loadAnns(ann_ids)\n\n        path = coco.loadImgs(img_id)[0]['file_name']\n        img = Image.open(os.path.join(self.img_root, path)).convert('RGB')\n\n        w, h = img.size\n        target = self.parse_targets(img_id, coco_target, w, h)\n        if self.transforms is not None:\n            img, target = self.transforms(img, target)\n\n        return img, target\n\n    def __len__(self):\n        return len(self.ids)\n\n    def get_height_and_width(self, index):\n        coco = self.coco\n        img_id = self.ids[index]\n\n        img_info = coco.loadImgs(img_id)[0]\n        w = img_info[\"width\"]\n        h = img_info[\"height\"]\n        return h, w\n\n    @staticmethod\n    def collate_fn(batch):\n        return tuple(zip(*batch))\n\n\n# train = CocoDetection(\"/data/coco_data/\", dataset=\"train\")\n# print(len(train))\n# t = train[0]\n# print(t)"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/network_files/__init__.py",
    "content": "from .faster_rcnn_framework import FasterRCNN, FastRCNNPredictor\nfrom .rpn_function import AnchorsGenerator\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/network_files/boxes.py",
    "content": "import torch\nfrom typing import Tuple\nfrom torch import Tensor\nimport torchvision\n\n\ndef nms(boxes, scores, iou_threshold):\n    # type: (Tensor, Tensor, float) -> Tensor\n    \"\"\"\n    Performs non-maximum suppression (NMS) on the boxes according\n    to their intersection-over-union (IoU).\n\n    NMS iteratively removes lower scoring boxes which have an\n    IoU greater than iou_threshold with another (higher scoring)\n    box.\n\n    Parameters\n    ----------\n    boxes : Tensor[N, 4])\n        boxes to perform NMS on. They\n        are expected to be in (x1, y1, x2, y2) format\n    scores : Tensor[N]\n        scores for each one of the boxes\n    iou_threshold : float\n        discards all overlapping\n        boxes with IoU > iou_threshold\n\n    Returns\n    -------\n    keep : Tensor\n        int64 tensor with the indices\n        of the elements that have been kept\n        by NMS, sorted in decreasing order of scores\n    \"\"\"\n    return torch.ops.torchvision.nms(boxes, scores, iou_threshold)\n\n\ndef batched_nms(boxes, scores, idxs, iou_threshold):\n    # type: (Tensor, Tensor, Tensor, float) -> Tensor\n    \"\"\"\n    Performs non-maximum suppression in a batched fashion.\n\n    Each index value correspond to a category, and NMS\n    will not be applied between elements of different categories.\n\n    Parameters\n    ----------\n    boxes : Tensor[N, 4]\n        boxes where NMS will be performed. They\n        are expected to be in (x1, y1, x2, y2) format\n    scores : Tensor[N]\n        scores for each one of the boxes\n    idxs : Tensor[N]\n        indices of the categories for each one of the boxes.\n    iou_threshold : float\n        discards all overlapping boxes\n        with IoU < iou_threshold\n\n    Returns\n    -------\n    keep : Tensor\n        int64 tensor with the indices of\n        the elements that have been kept by NMS, sorted\n        in decreasing order of scores\n    \"\"\"\n    if boxes.numel() == 0:\n        return torch.empty((0,), dtype=torch.int64, device=boxes.device)\n\n    # strategy: in order to perform NMS independently per class.\n    # we add an offset to all the boxes. The offset is dependent\n    # only on the class idx, and is large enough so that boxes\n    # from different classes do not overlap\n    # 获取所有boxes中最大的坐标值（xmin, ymin, xmax, ymax）\n    max_coordinate = boxes.max()\n\n    # to(): Performs Tensor dtype and/or device conversion\n    # 为每一个类别/每一层生成一个很大的偏移量\n    # 这里的to只是让生成tensor的dytpe和device与boxes保持一致\n    offsets = idxs.to(boxes) * (max_coordinate + 1)\n    # boxes加上对应层的偏移量后，保证不同类别/层之间boxes不会有重合的现象\n    boxes_for_nms = boxes + offsets[:, None]\n    keep = nms(boxes_for_nms, scores, iou_threshold)\n    return keep\n\n\ndef remove_small_boxes(boxes, min_size):\n    # type: (Tensor, float) -> Tensor\n    \"\"\"\n    Remove boxes which contains at least one side smaller than min_size.\n    移除宽高小于指定阈值的索引\n    Arguments:\n        boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format\n        min_size (float): minimum size\n\n    Returns:\n        keep (Tensor[K]): indices of the boxes that have both sides\n            larger than min_size\n    \"\"\"\n    ws, hs = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1]  # 预测boxes的宽和高\n    # keep = (ws >= min_size) & (hs >= min_size)  # 当满足宽，高都大于给定阈值时为True\n    keep = torch.logical_and(torch.ge(ws, min_size), torch.ge(hs, min_size))\n    # nonzero(): Returns a tensor containing the indices of all non-zero elements of input\n    # keep = keep.nonzero().squeeze(1)\n    keep = torch.where(keep)[0]\n    return keep\n\n\ndef clip_boxes_to_image(boxes, size):\n    # type: (Tensor, Tuple[int, int]) -> Tensor\n    \"\"\"\n    Clip boxes so that they lie inside an image of size `size`.\n    裁剪预测的boxes信息，将越界的坐标调整到图片边界上\n\n    Arguments:\n        boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format\n        size (Tuple[height, width]): size of the image\n\n    Returns:\n        clipped_boxes (Tensor[N, 4])\n    \"\"\"\n    dim = boxes.dim()\n    boxes_x = boxes[..., 0::2]  # x1, x2\n    boxes_y = boxes[..., 1::2]  # y1, y2\n    height, width = size\n\n    if torchvision._is_tracing():\n        boxes_x = torch.max(boxes_x, torch.tensor(0, dtype=boxes.dtype, device=boxes.device))\n        boxes_x = torch.min(boxes_x, torch.tensor(width, dtype=boxes.dtype, device=boxes.device))\n        boxes_y = torch.max(boxes_y, torch.tensor(0, dtype=boxes.dtype, device=boxes.device))\n        boxes_y = torch.min(boxes_y, torch.tensor(height, dtype=boxes.dtype, device=boxes.device))\n    else:\n        boxes_x = boxes_x.clamp(min=0, max=width)   # 限制x坐标范围在[0,width]之间\n        boxes_y = boxes_y.clamp(min=0, max=height)  # 限制y坐标范围在[0,height]之间\n\n    clipped_boxes = torch.stack((boxes_x, boxes_y), dim=dim)\n    return clipped_boxes.reshape(boxes.shape)\n\n\ndef box_area(boxes):\n    \"\"\"\n    Computes the area of a set of bounding boxes, which are specified by its\n    (x1, y1, x2, y2) coordinates.\n\n    Arguments:\n        boxes (Tensor[N, 4]): boxes for which the area will be computed. They\n            are expected to be in (x1, y1, x2, y2) format\n\n    Returns:\n        area (Tensor[N]): area for each box\n    \"\"\"\n    return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])\n\n\ndef box_iou(boxes1, boxes2):\n    \"\"\"\n    Return intersection-over-union (Jaccard index) of boxes.\n\n    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.\n\n    Arguments:\n        boxes1 (Tensor[N, 4])\n        boxes2 (Tensor[M, 4])\n\n    Returns:\n        iou (Tensor[N, M]): the NxM matrix containing the pairwise\n            IoU values for every element in boxes1 and boxes2\n    \"\"\"\n    area1 = box_area(boxes1)\n    area2 = box_area(boxes2)\n\n    #  When the shapes do not match,\n    #  the shape of the returned output tensor follows the broadcasting rules\n    lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # left-top [N,M,2]\n    rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # right-bottom [N,M,2]\n\n    wh = (rb - lt).clamp(min=0)  # [N,M,2]\n    inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]\n\n    iou = inter / (area1[:, None] + area2 - inter)\n    return iou\n\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/network_files/det_utils.py",
    "content": "import torch\nimport math\nfrom typing import List, Tuple\nfrom torch import Tensor\n\n\nclass BalancedPositiveNegativeSampler(object):\n    \"\"\"\n    This class samples batches, ensuring that they contain a fixed proportion of positives\n    \"\"\"\n\n    def __init__(self, batch_size_per_image, positive_fraction):\n        # type: (int, float) -> None\n        \"\"\"\n        Arguments:\n            batch_size_per_image (int): number of elements to be selected per image\n            positive_fraction (float): percentage of positive elements per batch\n        \"\"\"\n        self.batch_size_per_image = batch_size_per_image\n        self.positive_fraction = positive_fraction\n\n    def __call__(self, matched_idxs):\n        # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]\n        \"\"\"\n        Arguments:\n            matched idxs: list of tensors containing -1, 0 or positive values.\n                Each tensor corresponds to a specific image.\n                -1 values are ignored, 0 are considered as negatives and > 0 as\n                positives.\n\n        Returns:\n            pos_idx (list[tensor])\n            neg_idx (list[tensor])\n\n        Returns two lists of binary masks for each image.\n        The first list contains the positive elements that were selected,\n        and the second list the negative example.\n        \"\"\"\n        pos_idx = []\n        neg_idx = []\n        # 遍历每张图像的matched_idxs\n        for matched_idxs_per_image in matched_idxs:\n            # >= 1的为正样本, nonzero返回非零元素索引\n            # positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1)\n            positive = torch.where(torch.ge(matched_idxs_per_image, 1))[0]\n            # = 0的为负样本\n            # negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1)\n            negative = torch.where(torch.eq(matched_idxs_per_image, 0))[0]\n\n            # 指定正样本的数量\n            num_pos = int(self.batch_size_per_image * self.positive_fraction)\n            # protect against not enough positive examples\n            # 如果正样本数量不够就直接采用所有正样本\n            num_pos = min(positive.numel(), num_pos)\n            # 指定负样本数量\n            num_neg = self.batch_size_per_image - num_pos\n            # protect against not enough negative examples\n            # 如果负样本数量不够就直接采用所有负样本\n            num_neg = min(negative.numel(), num_neg)\n\n            # randomly select positive and negative examples\n            # Returns a random permutation of integers from 0 to n - 1.\n            # 随机选择指定数量的正负样本\n            perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]\n            perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]\n\n            pos_idx_per_image = positive[perm1]\n            neg_idx_per_image = negative[perm2]\n\n            # create binary mask from indices\n            pos_idx_per_image_mask = torch.zeros_like(\n                matched_idxs_per_image, dtype=torch.uint8\n            )\n            neg_idx_per_image_mask = torch.zeros_like(\n                matched_idxs_per_image, dtype=torch.uint8\n            )\n\n            pos_idx_per_image_mask[pos_idx_per_image] = 1\n            neg_idx_per_image_mask[neg_idx_per_image] = 1\n\n            pos_idx.append(pos_idx_per_image_mask)\n            neg_idx.append(neg_idx_per_image_mask)\n\n        return pos_idx, neg_idx\n\n\n@torch.jit._script_if_tracing\ndef encode_boxes(reference_boxes, proposals, weights):\n    # type: (torch.Tensor, torch.Tensor, torch.Tensor) -> torch.Tensor\n    \"\"\"\n    Encode a set of proposals with respect to some\n    reference boxes\n\n    Arguments:\n        reference_boxes (Tensor): reference boxes(gt)\n        proposals (Tensor): boxes to be encoded(anchors)\n        weights:\n    \"\"\"\n\n    # perform some unpacking to make it JIT-fusion friendly\n    wx = weights[0]\n    wy = weights[1]\n    ww = weights[2]\n    wh = weights[3]\n\n    # unsqueeze()\n    # Returns a new tensor with a dimension of size one inserted at the specified position.\n    proposals_x1 = proposals[:, 0].unsqueeze(1)\n    proposals_y1 = proposals[:, 1].unsqueeze(1)\n    proposals_x2 = proposals[:, 2].unsqueeze(1)\n    proposals_y2 = proposals[:, 3].unsqueeze(1)\n\n    reference_boxes_x1 = reference_boxes[:, 0].unsqueeze(1)\n    reference_boxes_y1 = reference_boxes[:, 1].unsqueeze(1)\n    reference_boxes_x2 = reference_boxes[:, 2].unsqueeze(1)\n    reference_boxes_y2 = reference_boxes[:, 3].unsqueeze(1)\n\n    # implementation starts here\n    # parse widths and heights\n    ex_widths = proposals_x2 - proposals_x1\n    ex_heights = proposals_y2 - proposals_y1\n    # parse coordinate of center point\n    ex_ctr_x = proposals_x1 + 0.5 * ex_widths\n    ex_ctr_y = proposals_y1 + 0.5 * ex_heights\n\n    gt_widths = reference_boxes_x2 - reference_boxes_x1\n    gt_heights = reference_boxes_y2 - reference_boxes_y1\n    gt_ctr_x = reference_boxes_x1 + 0.5 * gt_widths\n    gt_ctr_y = reference_boxes_y1 + 0.5 * gt_heights\n\n    targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths\n    targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights\n    targets_dw = ww * torch.log(gt_widths / ex_widths)\n    targets_dh = wh * torch.log(gt_heights / ex_heights)\n\n    targets = torch.cat((targets_dx, targets_dy, targets_dw, targets_dh), dim=1)\n    return targets\n\n\nclass BoxCoder(object):\n    \"\"\"\n    This class encodes and decodes a set of bounding boxes into\n    the representation used for training the regressors.\n    \"\"\"\n\n    def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)):\n        # type: (Tuple[float, float, float, float], float) -> None\n        \"\"\"\n        Arguments:\n            weights (4-element tuple)\n            bbox_xform_clip (float)\n        \"\"\"\n        self.weights = weights\n        self.bbox_xform_clip = bbox_xform_clip\n\n    def encode(self, reference_boxes, proposals):\n        # type: (List[Tensor], List[Tensor]) -> List[Tensor]\n        \"\"\"\n        结合anchors和与之对应的gt计算regression参数\n        Args:\n            reference_boxes: List[Tensor] 每个proposal/anchor对应的gt_boxes\n            proposals: List[Tensor] anchors/proposals\n\n        Returns: regression parameters\n\n        \"\"\"\n        # 统计每张图像的anchors个数，方便后面拼接在一起处理后在分开\n        # reference_boxes和proposal数据结构相同\n        boxes_per_image = [len(b) for b in reference_boxes]\n        reference_boxes = torch.cat(reference_boxes, dim=0)\n        proposals = torch.cat(proposals, dim=0)\n\n        # targets_dx, targets_dy, targets_dw, targets_dh\n        targets = self.encode_single(reference_boxes, proposals)\n        return targets.split(boxes_per_image, 0)\n\n    def encode_single(self, reference_boxes, proposals):\n        \"\"\"\n        Encode a set of proposals with respect to some\n        reference boxes\n\n        Arguments:\n            reference_boxes (Tensor): reference boxes\n            proposals (Tensor): boxes to be encoded\n        \"\"\"\n        dtype = reference_boxes.dtype\n        device = reference_boxes.device\n        weights = torch.as_tensor(self.weights, dtype=dtype, device=device)\n        targets = encode_boxes(reference_boxes, proposals, weights)\n\n        return targets\n\n    def decode(self, rel_codes, boxes):\n        # type: (Tensor, List[Tensor]) -> Tensor\n        \"\"\"\n\n        Args:\n            rel_codes: bbox regression parameters\n            boxes: anchors/proposals\n\n        Returns:\n\n        \"\"\"\n        assert isinstance(boxes, (list, tuple))\n        assert isinstance(rel_codes, torch.Tensor)\n        boxes_per_image = [b.size(0) for b in boxes]\n        concat_boxes = torch.cat(boxes, dim=0)\n\n        box_sum = 0\n        for val in boxes_per_image:\n            box_sum += val\n\n        # 将预测的bbox回归参数应用到对应anchors上得到预测bbox的坐标\n        pred_boxes = self.decode_single(\n            rel_codes, concat_boxes\n        )\n\n        # 防止pred_boxes为空时导致reshape报错\n        if box_sum > 0:\n            pred_boxes = pred_boxes.reshape(box_sum, -1, 4)\n\n        return pred_boxes\n\n    def decode_single(self, rel_codes, boxes):\n        \"\"\"\n        From a set of original boxes and encoded relative box offsets,\n        get the decoded boxes.\n\n        Arguments:\n            rel_codes (Tensor): encoded boxes (bbox regression parameters)\n            boxes (Tensor): reference boxes (anchors/proposals)\n        \"\"\"\n        boxes = boxes.to(rel_codes.dtype)\n\n        # xmin, ymin, xmax, ymax\n        widths = boxes[:, 2] - boxes[:, 0]   # anchor/proposal宽度\n        heights = boxes[:, 3] - boxes[:, 1]  # anchor/proposal高度\n        ctr_x = boxes[:, 0] + 0.5 * widths   # anchor/proposal中心x坐标\n        ctr_y = boxes[:, 1] + 0.5 * heights  # anchor/proposal中心y坐标\n\n        wx, wy, ww, wh = self.weights  # RPN中为[1,1,1,1], fastrcnn中为[10,10,5,5]\n        dx = rel_codes[:, 0::4] / wx   # 预测anchors/proposals的中心坐标x回归参数\n        dy = rel_codes[:, 1::4] / wy   # 预测anchors/proposals的中心坐标y回归参数\n        dw = rel_codes[:, 2::4] / ww   # 预测anchors/proposals的宽度回归参数\n        dh = rel_codes[:, 3::4] / wh   # 预测anchors/proposals的高度回归参数\n\n        # limit max value, prevent sending too large values into torch.exp()\n        # self.bbox_xform_clip=math.log(1000. / 16)   4.135\n        dw = torch.clamp(dw, max=self.bbox_xform_clip)\n        dh = torch.clamp(dh, max=self.bbox_xform_clip)\n\n        pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]\n        pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]\n        pred_w = torch.exp(dw) * widths[:, None]\n        pred_h = torch.exp(dh) * heights[:, None]\n\n        # xmin\n        pred_boxes1 = pred_ctr_x - torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w\n        # ymin\n        pred_boxes2 = pred_ctr_y - torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h\n        # xmax\n        pred_boxes3 = pred_ctr_x + torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w\n        # ymax\n        pred_boxes4 = pred_ctr_y + torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h\n\n        pred_boxes = torch.stack((pred_boxes1, pred_boxes2, pred_boxes3, pred_boxes4), dim=2).flatten(1)\n        return pred_boxes\n\n\nclass Matcher(object):\n    BELOW_LOW_THRESHOLD = -1\n    BETWEEN_THRESHOLDS = -2\n\n    __annotations__ = {\n        'BELOW_LOW_THRESHOLD': int,\n        'BETWEEN_THRESHOLDS': int,\n    }\n\n    def __init__(self, high_threshold, low_threshold, allow_low_quality_matches=False):\n        # type: (float, float, bool) -> None\n        \"\"\"\n        Args:\n            high_threshold (float): quality values greater than or equal to\n                this value are candidate matches.\n            low_threshold (float): a lower quality threshold used to stratify\n                matches into three levels:\n                1) matches >= high_threshold\n                2) BETWEEN_THRESHOLDS matches in [low_threshold, high_threshold)\n                3) BELOW_LOW_THRESHOLD matches in [0, low_threshold)\n            allow_low_quality_matches (bool): if True, produce additional matches\n                for predictions that have only low-quality match candidates. See\n                set_low_quality_matches_ for more details.\n        \"\"\"\n        self.BELOW_LOW_THRESHOLD = -1\n        self.BETWEEN_THRESHOLDS = -2\n        assert low_threshold <= high_threshold\n        self.high_threshold = high_threshold  # 0.7\n        self.low_threshold = low_threshold    # 0.3\n        self.allow_low_quality_matches = allow_low_quality_matches\n\n    def __call__(self, match_quality_matrix):\n        \"\"\"\n        计算anchors与每个gtboxes匹配的iou最大值，并记录索引，\n        iou<low_threshold索引值为-1， low_threshold<=iou<high_threshold索引值为-2\n        Args:\n            match_quality_matrix (Tensor[float]): an MxN tensor, containing the\n            pairwise quality between M ground-truth elements and N predicted elements.\n\n        Returns:\n            matches (Tensor[int64]): an N tensor where N[i] is a matched gt in\n            [0, M - 1] or a negative value indicating that prediction i could not\n            be matched.\n        \"\"\"\n        if match_quality_matrix.numel() == 0:\n            # empty targets or proposals not supported during training\n            if match_quality_matrix.shape[0] == 0:\n                raise ValueError(\n                    \"No ground-truth boxes available for one of the images \"\n                    \"during training\")\n            else:\n                raise ValueError(\n                    \"No proposal boxes available for one of the images \"\n                    \"during training\")\n\n        # match_quality_matrix is M (gt) x N (predicted)\n        # Max over gt elements (dim 0) to find best gt candidate for each prediction\n        # M x N 的每一列代表一个anchors与所有gt的匹配iou值\n        # matched_vals代表每列的最大值，即每个anchors与所有gt匹配的最大iou值\n        # matches对应最大值所在的索引\n        matched_vals, matches = match_quality_matrix.max(dim=0)  # the dimension to reduce.\n        if self.allow_low_quality_matches:\n            all_matches = matches.clone()\n        else:\n            all_matches = None\n\n        # Assign candidate matches with low quality to negative (unassigned) values\n        # 计算iou小于low_threshold的索引\n        below_low_threshold = matched_vals < self.low_threshold\n        # 计算iou在low_threshold与high_threshold之间的索引值\n        between_thresholds = (matched_vals >= self.low_threshold) & (\n            matched_vals < self.high_threshold\n        )\n        # iou小于low_threshold的matches索引置为-1\n        matches[below_low_threshold] = self.BELOW_LOW_THRESHOLD  # -1\n\n        # iou在[low_threshold, high_threshold]之间的matches索引置为-2\n        matches[between_thresholds] = self.BETWEEN_THRESHOLDS    # -2\n\n        if self.allow_low_quality_matches:\n            assert all_matches is not None\n            self.set_low_quality_matches_(matches, all_matches, match_quality_matrix)\n\n        return matches\n\n    def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix):\n        \"\"\"\n        Produce additional matches for predictions that have only low-quality matches.\n        Specifically, for each ground-truth find the set of predictions that have\n        maximum overlap with it (including ties); for each prediction in that set, if\n        it is unmatched, then match it to the ground-truth with which it has the highest\n        quality value.\n        \"\"\"\n        # For each gt, find the prediction with which it has highest quality\n        # 对于每个gt boxes寻找与其iou最大的anchor，\n        # highest_quality_foreach_gt为匹配到的最大iou值\n        highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1)  # the dimension to reduce.\n\n        # Find highest quality match available, even if it is low, including ties\n        # 寻找每个gt boxes与其iou最大的anchor索引，一个gt匹配到的最大iou可能有多个anchor\n        # gt_pred_pairs_of_highest_quality = torch.nonzero(\n        #     match_quality_matrix == highest_quality_foreach_gt[:, None]\n        # )\n        gt_pred_pairs_of_highest_quality = torch.where(\n            torch.eq(match_quality_matrix, highest_quality_foreach_gt[:, None])\n        )\n        # Example gt_pred_pairs_of_highest_quality:\n        #   tensor([[    0, 39796],\n        #           [    1, 32055],\n        #           [    1, 32070],\n        #           [    2, 39190],\n        #           [    2, 40255],\n        #           [    3, 40390],\n        #           [    3, 41455],\n        #           [    4, 45470],\n        #           [    5, 45325],\n        #           [    5, 46390]])\n        # Each row is a (gt index, prediction index)\n        # Note how gt items 1, 2, 3, and 5 each have two ties\n\n        # gt_pred_pairs_of_highest_quality[:, 0]代表是对应的gt index(不需要)\n        # pre_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1]\n        pre_inds_to_update = gt_pred_pairs_of_highest_quality[1]\n        # 保留该anchor匹配gt最大iou的索引，即使iou低于设定的阈值\n        matches[pre_inds_to_update] = all_matches[pre_inds_to_update]\n\n\ndef smooth_l1_loss(input, target, beta: float = 1. / 9, size_average: bool = True):\n    \"\"\"\n    very similar to the smooth_l1_loss from pytorch, but with\n    the extra beta parameter\n    \"\"\"\n    n = torch.abs(input - target)\n    # cond = n < beta\n    cond = torch.lt(n, beta)\n    loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)\n    if size_average:\n        return loss.mean()\n    return loss.sum()\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/network_files/faster_rcnn_framework.py",
    "content": "import warnings\nfrom collections import OrderedDict\nfrom typing import Tuple, List, Dict, Optional, Union\n\nimport torch\nfrom torch import nn, Tensor\nimport torch.nn.functional as F\nfrom torchvision.ops import MultiScaleRoIAlign\n\nfrom .roi_head import RoIHeads\nfrom .transform import GeneralizedRCNNTransform\nfrom .rpn_function import AnchorsGenerator, RPNHead, RegionProposalNetwork\n\n\nclass FasterRCNNBase(nn.Module):\n    \"\"\"\n    Main class for Generalized R-CNN.\n\n    Arguments:\n        backbone (nn.Module):\n        rpn (nn.Module):\n        roi_heads (nn.Module): takes the features + the proposals from the RPN and computes\n            detections / masks from it.\n        transform (nn.Module): performs the data transformation from the inputs to feed into\n            the model\n    \"\"\"\n\n    def __init__(self, backbone, rpn, roi_heads, transform):\n        super(FasterRCNNBase, self).__init__()\n        self.transform = transform\n        self.backbone = backbone\n        self.rpn = rpn\n        self.roi_heads = roi_heads\n        # used only on torchscript mode\n        self._has_warned = False\n\n    @torch.jit.unused\n    def eager_outputs(self, losses, detections):\n        # type: (Dict[str, Tensor], List[Dict[str, Tensor]]) -> Union[Dict[str, Tensor], List[Dict[str, Tensor]]]\n        if self.training:\n            return losses\n\n        return detections\n\n    def forward(self, images, targets=None):\n        # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]\n        \"\"\"\n        Arguments:\n            images (list[Tensor]): images to be processed\n            targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional)\n\n        Returns:\n            result (list[BoxList] or dict[Tensor]): the output from the model.\n                During training, it returns a dict[Tensor] which contains the losses.\n                During testing, it returns list[BoxList] contains additional fields\n                like `scores`, `labels` and `mask` (for Mask R-CNN models).\n\n        \"\"\"\n        if self.training and targets is None:\n            raise ValueError(\"In training mode, targets should be passed\")\n\n        if self.training:\n            assert targets is not None\n            for target in targets:         # 进一步判断传入的target的boxes参数是否符合规定\n                boxes = target[\"boxes\"]\n                if isinstance(boxes, torch.Tensor):\n                    if len(boxes.shape) != 2 or boxes.shape[-1] != 4:\n                        raise ValueError(\"Expected target boxes to be a tensor\"\n                                         \"of shape [N, 4], got {:}.\".format(\n                                          boxes.shape))\n                else:\n                    raise ValueError(\"Expected target boxes to be of type \"\n                                     \"Tensor, got {:}.\".format(type(boxes)))\n\n        original_image_sizes = torch.jit.annotate(List[Tuple[int, int]], [])\n        for img in images:\n            val = img.shape[-2:]\n            assert len(val) == 2  # 防止输入的是个一维向量\n            original_image_sizes.append((val[0], val[1]))\n        # original_image_sizes = [img.shape[-2:] for img in images]\n\n        images, targets = self.transform(images, targets)  # 对图像进行预处理\n        # print(images.tensors.shape)\n        features = self.backbone(images.tensors)  # 将图像输入backbone得到特征图\n        if isinstance(features, torch.Tensor):  # 若只在一层特征层上预测，将feature放入有序字典中，并编号为‘0’\n            features = OrderedDict([('0', features)])  # 若在多层特征层上预测，传入的就是一个有序字典\n\n        # 将特征层以及标注target信息传入rpn中\n        # proposals: List[Tensor], Tensor_shape: [num_proposals, 4],\n        # 每个proposals是绝对坐标，且为(x1, y1, x2, y2)格式\n        proposals, proposal_losses = self.rpn(images, features, targets)\n\n        # 将rpn生成的数据以及标注target信息传入fast rcnn后半部分\n        detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets)\n\n        # 对网络的预测结果进行后处理（主要将bboxes还原到原图像尺度上）\n        detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)\n\n        losses = {}\n        losses.update(detector_losses)\n        losses.update(proposal_losses)\n\n        if torch.jit.is_scripting():\n            if not self._has_warned:\n                warnings.warn(\"RCNN always returns a (Losses, Detections) tuple in scripting\")\n                self._has_warned = True\n            return losses, detections\n        else:\n            return self.eager_outputs(losses, detections)\n\n        # if self.training:\n        #     return losses\n        #\n        # return detections\n\n\nclass TwoMLPHead(nn.Module):\n    \"\"\"\n    Standard heads for FPN-based models\n\n    Arguments:\n        in_channels (int): number of input channels\n        representation_size (int): size of the intermediate representation\n    \"\"\"\n\n    def __init__(self, in_channels, representation_size):\n        super(TwoMLPHead, self).__init__()\n\n        self.fc6 = nn.Linear(in_channels, representation_size)\n        self.fc7 = nn.Linear(representation_size, representation_size)\n\n    def forward(self, x):\n        x = x.flatten(start_dim=1)\n\n        x = F.relu(self.fc6(x))\n        x = F.relu(self.fc7(x))\n\n        return x\n\n\nclass FastRCNNPredictor(nn.Module):\n    \"\"\"\n    Standard classification + bounding box regression layers\n    for Fast R-CNN.\n\n    Arguments:\n        in_channels (int): number of input channels\n        num_classes (int): number of output classes (including background)\n    \"\"\"\n\n    def __init__(self, in_channels, num_classes):\n        super(FastRCNNPredictor, self).__init__()\n        self.cls_score = nn.Linear(in_channels, num_classes)\n        self.bbox_pred = nn.Linear(in_channels, num_classes * 4)\n\n    def forward(self, x):\n        if x.dim() == 4:\n            assert list(x.shape[2:]) == [1, 1]\n        x = x.flatten(start_dim=1)\n        scores = self.cls_score(x)\n        bbox_deltas = self.bbox_pred(x)\n\n        return scores, bbox_deltas\n\n\nclass FasterRCNN(FasterRCNNBase):\n    \"\"\"\n    Implements Faster R-CNN.\n\n    The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each\n    image, and should be in 0-1 range. Different images can have different sizes.\n\n    The behavior of the model changes depending if it is in training or evaluation mode.\n\n    During training, the model expects both the input tensors, as well as a targets (list of dictionary),\n    containing:\n        - boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with values\n          between 0 and H and 0 and W\n        - labels (Int64Tensor[N]): the class label for each ground-truth box\n\n    The model returns a Dict[Tensor] during training, containing the classification and regression\n    losses for both the RPN and the R-CNN.\n\n    During inference, the model requires only the input tensors, and returns the post-processed\n    predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as\n    follows:\n        - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values between\n          0 and H and 0 and W\n        - labels (Int64Tensor[N]): the predicted labels for each image\n        - scores (Tensor[N]): the scores or each prediction\n\n    Arguments:\n        backbone (nn.Module): the network used to compute the features for the model.\n            It should contain a out_channels attribute, which indicates the number of output\n            channels that each feature map has (and it should be the same for all feature maps).\n            The backbone should return a single Tensor or and OrderedDict[Tensor].\n        num_classes (int): number of output classes of the model (including the background).\n            If box_predictor is specified, num_classes should be None.\n        min_size (int): minimum size of the image to be rescaled before feeding it to the backbone\n        max_size (int): maximum size of the image to be rescaled before feeding it to the backbone\n        image_mean (Tuple[float, float, float]): mean values used for input normalization.\n            They are generally the mean values of the dataset on which the backbone has been trained\n            on\n        image_std (Tuple[float, float, float]): std values used for input normalization.\n            They are generally the std values of the dataset on which the backbone has been trained on\n        rpn_anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature\n            maps.\n        rpn_head (nn.Module): module that computes the objectness and regression deltas from the RPN\n        rpn_pre_nms_top_n_train (int): number of proposals to keep before applying NMS during training\n        rpn_pre_nms_top_n_test (int): number of proposals to keep before applying NMS during testing\n        rpn_post_nms_top_n_train (int): number of proposals to keep after applying NMS during training\n        rpn_post_nms_top_n_test (int): number of proposals to keep after applying NMS during testing\n        rpn_nms_thresh (float): NMS threshold used for postprocessing the RPN proposals\n        rpn_fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be\n            considered as positive during training of the RPN.\n        rpn_bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be\n            considered as negative during training of the RPN.\n        rpn_batch_size_per_image (int): number of anchors that are sampled during training of the RPN\n            for computing the loss\n        rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training\n            of the RPN\n        rpn_score_thresh (float): during inference, only return proposals with a classification score\n            greater than rpn_score_thresh\n        box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in\n            the locations indicated by the bounding boxes\n        box_head (nn.Module): module that takes the cropped feature maps as input\n        box_predictor (nn.Module): module that takes the output of box_head and returns the\n            classification logits and box regression deltas.\n        box_score_thresh (float): during inference, only return proposals with a classification score\n            greater than box_score_thresh\n        box_nms_thresh (float): NMS threshold for the prediction head. Used during inference\n        box_detections_per_img (int): maximum number of detections per image, for all classes.\n        box_fg_iou_thresh (float): minimum IoU between the proposals and the GT box so that they can be\n            considered as positive during training of the classification head\n        box_bg_iou_thresh (float): maximum IoU between the proposals and the GT box so that they can be\n            considered as negative during training of the classification head\n        box_batch_size_per_image (int): number of proposals that are sampled during training of the\n            classification head\n        box_positive_fraction (float): proportion of positive proposals in a mini-batch during training\n            of the classification head\n        bbox_reg_weights (Tuple[float, float, float, float]): weights for the encoding/decoding of the\n            bounding boxes\n\n    \"\"\"\n\n    def __init__(self, backbone, num_classes=None,\n                 # transform parameter\n                 min_size=800, max_size=1333,      # 预处理resize时限制的最小尺寸与最大尺寸\n                 image_mean=None, image_std=None,  # 预处理normalize时使用的均值和方差\n                 # RPN parameters\n                 rpn_anchor_generator=None, rpn_head=None,\n                 rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000,    # rpn中在nms处理前保留的proposal数(根据score)\n                 rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000,  # rpn中在nms处理后保留的proposal数\n                 rpn_nms_thresh=0.7,  # rpn中进行nms处理时使用的iou阈值\n                 rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3,  # rpn计算损失时，采集正负样本设置的阈值\n                 rpn_batch_size_per_image=256, rpn_positive_fraction=0.5,  # rpn计算损失时采样的样本数，以及正样本占总样本的比例\n                 rpn_score_thresh=0.0,\n                 # Box parameters\n                 box_roi_pool=None, box_head=None, box_predictor=None,\n                 # 移除低目标概率      fast rcnn中进行nms处理的阈值   对预测结果根据score排序取前100个目标\n                 box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100,\n                 box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5,   # fast rcnn计算误差时，采集正负样本设置的阈值\n                 box_batch_size_per_image=512, box_positive_fraction=0.25,  # fast rcnn计算误差时采样的样本数，以及正样本占所有样本的比例\n                 bbox_reg_weights=None):\n        if not hasattr(backbone, \"out_channels\"):\n            raise ValueError(\n                \"backbone should contain an attribute out_channels\"\n                \"specifying the number of output channels  (assumed to be the\"\n                \"same for all the levels\"\n            )\n\n        assert isinstance(rpn_anchor_generator, (AnchorsGenerator, type(None)))\n        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))\n\n        if num_classes is not None:\n            if box_predictor is not None:\n                raise ValueError(\"num_classes should be None when box_predictor \"\n                                 \"is specified\")\n        else:\n            if box_predictor is None:\n                raise ValueError(\"num_classes should not be None when box_predictor \"\n                                 \"is not specified\")\n\n        # 预测特征层的channels\n        out_channels = backbone.out_channels\n\n        # 若anchor生成器为空，则自动生成针对resnet50_fpn的anchor生成器\n        if rpn_anchor_generator is None:\n            anchor_sizes = ((32,), (64,), (128,), (256,), (512,))\n            aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)\n            rpn_anchor_generator = AnchorsGenerator(\n                anchor_sizes, aspect_ratios\n            )\n\n        # 生成RPN通过滑动窗口预测网络部分\n        if rpn_head is None:\n            rpn_head = RPNHead(\n                out_channels, rpn_anchor_generator.num_anchors_per_location()[0]\n            )\n\n        # 默认rpn_pre_nms_top_n_train = 2000, rpn_pre_nms_top_n_test = 1000,\n        # 默认rpn_post_nms_top_n_train = 2000, rpn_post_nms_top_n_test = 1000,\n        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test)\n        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)\n\n        # 定义整个RPN框架\n        rpn = RegionProposalNetwork(\n            rpn_anchor_generator, rpn_head,\n            rpn_fg_iou_thresh, rpn_bg_iou_thresh,\n            rpn_batch_size_per_image, rpn_positive_fraction,\n            rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh,\n            score_thresh=rpn_score_thresh)\n\n        #  Multi-scale RoIAlign pooling\n        if box_roi_pool is None:\n            box_roi_pool = MultiScaleRoIAlign(\n                featmap_names=['0', '1', '2', '3'],  # 在哪些特征层进行roi pooling\n                output_size=[7, 7],\n                sampling_ratio=2)\n\n        # fast RCNN中roi pooling后的展平处理两个全连接层部分\n        if box_head is None:\n            resolution = box_roi_pool.output_size[0]  # 默认等于7\n            representation_size = 1024\n            box_head = TwoMLPHead(\n                out_channels * resolution ** 2,\n                representation_size\n            )\n\n        # 在box_head的输出上预测部分\n        if box_predictor is None:\n            representation_size = 1024\n            box_predictor = FastRCNNPredictor(\n                representation_size,\n                num_classes)\n\n        # 将roi pooling, box_head以及box_predictor结合在一起\n        roi_heads = RoIHeads(\n            # box\n            box_roi_pool, box_head, box_predictor,\n            box_fg_iou_thresh, box_bg_iou_thresh,  # 0.5  0.5\n            box_batch_size_per_image, box_positive_fraction,  # 512  0.25\n            bbox_reg_weights,\n            box_score_thresh, box_nms_thresh, box_detections_per_img)  # 0.05  0.5  100\n\n        if image_mean is None:\n            image_mean = [0.485, 0.456, 0.406]\n        if image_std is None:\n            image_std = [0.229, 0.224, 0.225]\n\n        # 对数据进行标准化，缩放，打包成batch等处理部分\n        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std)\n\n        super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform)\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/network_files/image_list.py",
    "content": "from typing import List, Tuple\nfrom torch import Tensor\n\n\nclass ImageList(object):\n    \"\"\"\n    Structure that holds a list of images (of possibly\n    varying sizes) as a single tensor.\n    This works by padding the images to the same size,\n    and storing in a field the original sizes of each image\n    \"\"\"\n\n    def __init__(self, tensors, image_sizes):\n        # type: (Tensor, List[Tuple[int, int]]) -> None\n        \"\"\"\n        Arguments:\n            tensors (tensor) padding后的图像数据\n            image_sizes (list[tuple[int, int]])  padding前的图像尺寸\n        \"\"\"\n        self.tensors = tensors\n        self.image_sizes = image_sizes\n\n    def to(self, device):\n        # type: (Device) -> ImageList # noqa\n        cast_tensor = self.tensors.to(device)\n        return ImageList(cast_tensor, self.image_sizes)\n\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/network_files/roi_head.py",
    "content": "from typing import Optional, List, Dict, Tuple\n\nimport torch\nfrom torch import Tensor\nimport torch.nn.functional as F\n\nfrom . import det_utils\nfrom . import boxes as box_ops\n\n\ndef fastrcnn_loss(class_logits, box_regression, labels, regression_targets):\n    # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]\n    \"\"\"\n    Computes the loss for Faster R-CNN.\n\n    Arguments:\n        class_logits : 预测类别概率信息，shape=[num_anchors, num_classes]\n        box_regression : 预测边目标界框回归信息\n        labels : 真实类别信息\n        regression_targets : 真实目标边界框信息\n\n    Returns:\n        classification_loss (Tensor)\n        box_loss (Tensor)\n    \"\"\"\n\n    labels = torch.cat(labels, dim=0)\n    regression_targets = torch.cat(regression_targets, dim=0)\n\n    # 计算类别损失信息\n    classification_loss = F.cross_entropy(class_logits, labels)\n\n    # get indices that correspond to the regression targets for\n    # the corresponding ground truth labels, to be used with\n    # advanced indexing\n    # 返回标签类别大于0的索引\n    # sampled_pos_inds_subset = torch.nonzero(torch.gt(labels, 0)).squeeze(1)\n    sampled_pos_inds_subset = torch.where(torch.gt(labels, 0))[0]\n\n    # 返回标签类别大于0位置的类别信息\n    labels_pos = labels[sampled_pos_inds_subset]\n\n    # shape=[num_proposal, num_classes]\n    N, num_classes = class_logits.shape\n    box_regression = box_regression.reshape(N, -1, 4)\n\n    # 计算边界框损失信息\n    box_loss = det_utils.smooth_l1_loss(\n        # 获取指定索引proposal的指定类别box信息\n        box_regression[sampled_pos_inds_subset, labels_pos],\n        regression_targets[sampled_pos_inds_subset],\n        beta=1 / 9,\n        size_average=False,\n    ) / labels.numel()\n\n    return classification_loss, box_loss\n\n\nclass RoIHeads(torch.nn.Module):\n    __annotations__ = {\n        'box_coder': det_utils.BoxCoder,\n        'proposal_matcher': det_utils.Matcher,\n        'fg_bg_sampler': det_utils.BalancedPositiveNegativeSampler,\n    }\n\n    def __init__(self,\n                 box_roi_pool,   # Multi-scale RoIAlign pooling\n                 box_head,       # TwoMLPHead\n                 box_predictor,  # FastRCNNPredictor\n                 # Faster R-CNN training\n                 fg_iou_thresh, bg_iou_thresh,  # default: 0.5, 0.5\n                 batch_size_per_image, positive_fraction,  # default: 512, 0.25\n                 bbox_reg_weights,  # None\n                 # Faster R-CNN inference\n                 score_thresh,        # default: 0.05\n                 nms_thresh,          # default: 0.5\n                 detection_per_img):  # default: 100\n        super(RoIHeads, self).__init__()\n\n        self.box_similarity = box_ops.box_iou\n        # assign ground-truth boxes for each proposal\n        self.proposal_matcher = det_utils.Matcher(\n            fg_iou_thresh,  # default: 0.5\n            bg_iou_thresh,  # default: 0.5\n            allow_low_quality_matches=False)\n\n        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(\n            batch_size_per_image,  # default: 512\n            positive_fraction)     # default: 0.25\n\n        if bbox_reg_weights is None:\n            bbox_reg_weights = (10., 10., 5., 5.)\n        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)\n\n        self.box_roi_pool = box_roi_pool    # Multi-scale RoIAlign pooling\n        self.box_head = box_head            # TwoMLPHead\n        self.box_predictor = box_predictor  # FastRCNNPredictor\n\n        self.score_thresh = score_thresh  # default: 0.05\n        self.nms_thresh = nms_thresh      # default: 0.5\n        self.detection_per_img = detection_per_img  # default: 100\n\n    def assign_targets_to_proposals(self, proposals, gt_boxes, gt_labels):\n        # type: (List[Tensor], List[Tensor], List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]\n        \"\"\"\n        为每个proposal匹配对应的gt_box，并划分到正负样本中\n        Args:\n            proposals:\n            gt_boxes:\n            gt_labels:\n\n        Returns:\n\n        \"\"\"\n        matched_idxs = []\n        labels = []\n        # 遍历每张图像的proposals, gt_boxes, gt_labels信息\n        for proposals_in_image, gt_boxes_in_image, gt_labels_in_image in zip(proposals, gt_boxes, gt_labels):\n            if gt_boxes_in_image.numel() == 0:  # 该张图像中没有gt框，为背景\n                # background image\n                device = proposals_in_image.device\n                clamped_matched_idxs_in_image = torch.zeros(\n                    (proposals_in_image.shape[0],), dtype=torch.int64, device=device\n                )\n                labels_in_image = torch.zeros(\n                    (proposals_in_image.shape[0],), dtype=torch.int64, device=device\n                )\n            else:\n                #  set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands\n                # 计算proposal与每个gt_box的iou重合度\n                match_quality_matrix = box_ops.box_iou(gt_boxes_in_image, proposals_in_image)\n\n                # 计算proposal与每个gt_box匹配的iou最大值，并记录索引，\n                # iou < low_threshold索引值为 -1， low_threshold <= iou < high_threshold索引值为 -2\n                matched_idxs_in_image = self.proposal_matcher(match_quality_matrix)\n\n                # 限制最小值，防止匹配标签时出现越界的情况\n                # 注意-1, -2对应的gt索引会调整到0,获取的标签类别为第0个gt的类别（实际上并不是）,后续会进一步处理\n                clamped_matched_idxs_in_image = matched_idxs_in_image.clamp(min=0)\n                # 获取proposal匹配到的gt对应标签\n                labels_in_image = gt_labels_in_image[clamped_matched_idxs_in_image]\n                labels_in_image = labels_in_image.to(dtype=torch.int64)\n\n                # label background (below the low threshold)\n                # 将gt索引为-1的类别设置为0，即背景，负样本\n                bg_inds = matched_idxs_in_image == self.proposal_matcher.BELOW_LOW_THRESHOLD  # -1\n                labels_in_image[bg_inds] = 0\n\n                # label ignore proposals (between low and high threshold)\n                # 将gt索引为-2的类别设置为-1, 即废弃样本\n                ignore_inds = matched_idxs_in_image == self.proposal_matcher.BETWEEN_THRESHOLDS  # -2\n                labels_in_image[ignore_inds] = -1  # -1 is ignored by sampler\n\n            matched_idxs.append(clamped_matched_idxs_in_image)\n            labels.append(labels_in_image)\n        return matched_idxs, labels\n\n    def subsample(self, labels):\n        # type: (List[Tensor]) -> List[Tensor]\n        # BalancedPositiveNegativeSampler\n        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)\n        sampled_inds = []\n        # 遍历每张图片的正负样本索引\n        for img_idx, (pos_inds_img, neg_inds_img) in enumerate(zip(sampled_pos_inds, sampled_neg_inds)):\n            # 记录所有采集样本索引（包括正样本和负样本）\n            # img_sampled_inds = torch.nonzero(pos_inds_img | neg_inds_img).squeeze(1)\n            img_sampled_inds = torch.where(pos_inds_img | neg_inds_img)[0]\n            sampled_inds.append(img_sampled_inds)\n        return sampled_inds\n\n    def add_gt_proposals(self, proposals, gt_boxes):\n        # type: (List[Tensor], List[Tensor]) -> List[Tensor]\n        \"\"\"\n        将gt_boxes拼接到proposal后面\n        Args:\n            proposals: 一个batch中每张图像rpn预测的boxes\n            gt_boxes:  一个batch中每张图像对应的真实目标边界框\n\n        Returns:\n\n        \"\"\"\n        proposals = [\n            torch.cat((proposal, gt_box))\n            for proposal, gt_box in zip(proposals, gt_boxes)\n        ]\n        return proposals\n\n    def check_targets(self, targets):\n        # type: (Optional[List[Dict[str, Tensor]]]) -> None\n        assert targets is not None\n        assert all([\"boxes\" in t for t in targets])\n        assert all([\"labels\" in t for t in targets])\n\n    def select_training_samples(self,\n                                proposals,  # type: List[Tensor]\n                                targets     # type: Optional[List[Dict[str, Tensor]]]\n                                ):\n        # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]\n        \"\"\"\n        划分正负样本，统计对应gt的标签以及边界框回归信息\n        list元素个数为batch_size\n        Args:\n            proposals: rpn预测的boxes\n            targets:\n\n        Returns:\n\n        \"\"\"\n\n        # 检查target数据是否为空\n        self.check_targets(targets)\n        # 如果不加这句，jit.script会不通过(看不懂)\n        assert targets is not None\n\n        dtype = proposals[0].dtype\n        device = proposals[0].device\n\n        # 获取标注好的boxes以及labels信息\n        gt_boxes = [t[\"boxes\"].to(dtype) for t in targets]\n        gt_labels = [t[\"labels\"] for t in targets]\n\n        # append ground-truth bboxes to proposal\n        # 将gt_boxes拼接到proposal后面\n        proposals = self.add_gt_proposals(proposals, gt_boxes)\n\n        # get matching gt indices for each proposal\n        # 为每个proposal匹配对应的gt_box，并划分到正负样本中\n        matched_idxs, labels = self.assign_targets_to_proposals(proposals, gt_boxes, gt_labels)\n        # sample a fixed proportion of positive-negative proposals\n        # 按给定数量和比例采样正负样本\n        sampled_inds = self.subsample(labels)\n        matched_gt_boxes = []\n        num_images = len(proposals)\n\n        # 遍历每张图像\n        for img_id in range(num_images):\n            # 获取每张图像的正负样本索引\n            img_sampled_inds = sampled_inds[img_id]\n            # 获取对应正负样本的proposals信息\n            proposals[img_id] = proposals[img_id][img_sampled_inds]\n            # 获取对应正负样本的真实类别信息\n            labels[img_id] = labels[img_id][img_sampled_inds]\n            # 获取对应正负样本的gt索引信息\n            matched_idxs[img_id] = matched_idxs[img_id][img_sampled_inds]\n\n            gt_boxes_in_image = gt_boxes[img_id]\n            if gt_boxes_in_image.numel() == 0:\n                gt_boxes_in_image = torch.zeros((1, 4), dtype=dtype, device=device)\n            # 获取对应正负样本的gt box信息\n            matched_gt_boxes.append(gt_boxes_in_image[matched_idxs[img_id]])\n\n        # 根据gt和proposal计算边框回归参数（针对gt的）\n        regression_targets = self.box_coder.encode(matched_gt_boxes, proposals)\n        return proposals, labels, regression_targets\n\n    def postprocess_detections(self,\n                               class_logits,    # type: Tensor\n                               box_regression,  # type: Tensor\n                               proposals,       # type: List[Tensor]\n                               image_shapes     # type: List[Tuple[int, int]]\n                               ):\n        # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]\n        \"\"\"\n        对网络的预测数据进行后处理，包括\n        （1）根据proposal以及预测的回归参数计算出最终bbox坐标\n        （2）对预测类别结果进行softmax处理\n        （3）裁剪预测的boxes信息，将越界的坐标调整到图片边界上\n        （4）移除所有背景信息\n        （5）移除低概率目标\n        （6）移除小尺寸目标\n        （7）执行nms处理，并按scores进行排序\n        （8）根据scores排序返回前topk个目标\n        Args:\n            class_logits: 网络预测类别概率信息\n            box_regression: 网络预测的边界框回归参数\n            proposals: rpn输出的proposal\n            image_shapes: 打包成batch前每张图像的宽高\n\n        Returns:\n\n        \"\"\"\n        device = class_logits.device\n        # 预测目标类别数\n        num_classes = class_logits.shape[-1]\n\n        # 获取每张图像的预测bbox数量\n        boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals]\n        # 根据proposal以及预测的回归参数计算出最终bbox坐标\n        pred_boxes = self.box_coder.decode(box_regression, proposals)\n\n        # 对预测类别结果进行softmax处理\n        pred_scores = F.softmax(class_logits, -1)\n\n        # split boxes and scores per image\n        # 根据每张图像的预测bbox数量分割结果\n        pred_boxes_list = pred_boxes.split(boxes_per_image, 0)\n        pred_scores_list = pred_scores.split(boxes_per_image, 0)\n\n        all_boxes = []\n        all_scores = []\n        all_labels = []\n        # 遍历每张图像预测信息\n        for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes):\n            # 裁剪预测的boxes信息，将越界的坐标调整到图片边界上\n            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)\n\n            # create labels for each prediction\n            labels = torch.arange(num_classes, device=device)\n            labels = labels.view(1, -1).expand_as(scores)\n\n            # remove prediction with the background label\n            # 移除索引为0的所有信息（0代表背景）\n            boxes = boxes[:, 1:]\n            scores = scores[:, 1:]\n            labels = labels[:, 1:]\n\n            # batch everything, by making every class prediction be a separate instance\n            boxes = boxes.reshape(-1, 4)\n            scores = scores.reshape(-1)\n            labels = labels.reshape(-1)\n\n            # remove low scoring boxes\n            # 移除低概率目标，self.scores_thresh=0.05\n            # gt: Computes input > other element-wise.\n            # inds = torch.nonzero(torch.gt(scores, self.score_thresh)).squeeze(1)\n            inds = torch.where(torch.gt(scores, self.score_thresh))[0]\n            boxes, scores, labels = boxes[inds], scores[inds], labels[inds]\n\n            # remove empty boxes\n            # 移除小目标\n            keep = box_ops.remove_small_boxes(boxes, min_size=1.)\n            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]\n\n            # non-maximun suppression, independently done per class\n            # 执行nms处理，执行后的结果会按照scores从大到小进行排序返回\n            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)\n\n            # keep only topk scoring predictions\n            # 获取scores排在前topk个预测目标\n            keep = keep[:self.detection_per_img]\n            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]\n\n            all_boxes.append(boxes)\n            all_scores.append(scores)\n            all_labels.append(labels)\n\n        return all_boxes, all_scores, all_labels\n\n    def forward(self,\n                features,       # type: Dict[str, Tensor]\n                proposals,      # type: List[Tensor]\n                image_shapes,   # type: List[Tuple[int, int]]\n                targets=None    # type: Optional[List[Dict[str, Tensor]]]\n                ):\n        # type: (...) -> Tuple[List[Dict[str, Tensor]], Dict[str, Tensor]]\n        \"\"\"\n        Arguments:\n            features (List[Tensor])\n            proposals (List[Tensor[N, 4]])\n            image_shapes (List[Tuple[H, W]])\n            targets (List[Dict])\n        \"\"\"\n\n        # 检查targets的数据类型是否正确\n        if targets is not None:\n            for t in targets:\n                floating_point_types = (torch.float, torch.double, torch.half)\n                assert t[\"boxes\"].dtype in floating_point_types, \"target boxes must of float type\"\n                assert t[\"labels\"].dtype == torch.int64, \"target labels must of int64 type\"\n\n        if self.training:\n            # 划分正负样本，统计对应gt的标签以及边界框回归信息\n            proposals, labels, regression_targets = self.select_training_samples(proposals, targets)\n        else:\n            labels = None\n            regression_targets = None\n\n        # 将采集样本通过Multi-scale RoIAlign pooling层\n        # box_features_shape: [num_proposals, channel, height, width]\n        box_features = self.box_roi_pool(features, proposals, image_shapes)\n\n        # 通过roi_pooling后的两层全连接层\n        # box_features_shape: [num_proposals, representation_size]\n        box_features = self.box_head(box_features)\n\n        # 接着分别预测目标类别和边界框回归参数\n        class_logits, box_regression = self.box_predictor(box_features)\n\n        result = torch.jit.annotate(List[Dict[str, torch.Tensor]], [])\n        losses = {}\n        if self.training:\n            assert labels is not None and regression_targets is not None\n            loss_classifier, loss_box_reg = fastrcnn_loss(\n                class_logits, box_regression, labels, regression_targets)\n            losses = {\n                \"loss_classifier\": loss_classifier,\n                \"loss_box_reg\": loss_box_reg\n            }\n        else:\n            boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals, image_shapes)\n            num_images = len(boxes)\n            for i in range(num_images):\n                result.append(\n                    {\n                        \"boxes\": boxes[i],\n                        \"labels\": labels[i],\n                        \"scores\": scores[i],\n                    }\n                )\n\n        return result, losses\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/network_files/rpn_function.py",
    "content": "from typing import List, Optional, Dict, Tuple\n\nimport torch\nfrom torch import nn, Tensor\nfrom torch.nn import functional as F\nimport torchvision\n\nfrom . import det_utils\nfrom . import boxes as box_ops\nfrom .image_list import ImageList\n\n\n@torch.jit.unused\ndef _onnx_get_num_anchors_and_pre_nms_top_n(ob, orig_pre_nms_top_n):\n    # type: (Tensor, int) -> Tuple[int, int]\n    from torch.onnx import operators\n    num_anchors = operators.shape_as_tensor(ob)[1].unsqueeze(0)\n    pre_nms_top_n = torch.min(torch.cat(\n        (torch.tensor([orig_pre_nms_top_n], dtype=num_anchors.dtype),\n         num_anchors), 0))\n\n    return num_anchors, pre_nms_top_n\n\n\nclass AnchorsGenerator(nn.Module):\n    __annotations__ = {\n        \"cell_anchors\": Optional[List[torch.Tensor]],\n        \"_cache\": Dict[str, List[torch.Tensor]]\n    }\n\n    \"\"\"\n    anchors生成器\n    Module that generates anchors for a set of feature maps and\n    image sizes.\n\n    The module support computing anchors at multiple sizes and aspect ratios\n    per feature map.\n\n    sizes and aspect_ratios should have the same number of elements, and it should\n    correspond to the number of feature maps.\n\n    sizes[i] and aspect_ratios[i] can have an arbitrary number of elements,\n    and AnchorGenerator will output a set of sizes[i] * aspect_ratios[i] anchors\n    per spatial location for feature map i.\n\n    Arguments:\n        sizes (Tuple[Tuple[int]]):\n        aspect_ratios (Tuple[Tuple[float]]):\n    \"\"\"\n\n    def __init__(self, sizes=(128, 256, 512), aspect_ratios=(0.5, 1.0, 2.0)):\n        super(AnchorsGenerator, self).__init__()\n\n        if not isinstance(sizes[0], (list, tuple)):\n            # TODO change this\n            sizes = tuple((s,) for s in sizes)\n        if not isinstance(aspect_ratios[0], (list, tuple)):\n            aspect_ratios = (aspect_ratios,) * len(sizes)\n\n        assert len(sizes) == len(aspect_ratios)\n\n        self.sizes = sizes\n        self.aspect_ratios = aspect_ratios\n        self.cell_anchors = None\n        self._cache = {}\n\n    def generate_anchors(self, scales, aspect_ratios, dtype=torch.float32, device=torch.device(\"cpu\")):\n        # type: (List[int], List[float], torch.dtype, torch.device) -> Tensor\n        \"\"\"\n        compute anchor sizes\n        Arguments:\n            scales: sqrt(anchor_area)\n            aspect_ratios: h/w ratios\n            dtype: float32\n            device: cpu/gpu\n        \"\"\"\n        scales = torch.as_tensor(scales, dtype=dtype, device=device)\n        aspect_ratios = torch.as_tensor(aspect_ratios, dtype=dtype, device=device)\n        h_ratios = torch.sqrt(aspect_ratios)\n        w_ratios = 1.0 / h_ratios\n\n        # [r1, r2, r3]' * [s1, s2, s3]\n        # number of elements is len(ratios)*len(scales)\n        ws = (w_ratios[:, None] * scales[None, :]).view(-1)\n        hs = (h_ratios[:, None] * scales[None, :]).view(-1)\n\n        # left-top, right-bottom coordinate relative to anchor center(0, 0)\n        # 生成的anchors模板都是以（0, 0）为中心的, shape [len(ratios)*len(scales), 4]\n        base_anchors = torch.stack([-ws, -hs, ws, hs], dim=1) / 2\n\n        return base_anchors.round()  # round 四舍五入\n\n    def set_cell_anchors(self, dtype, device):\n        # type: (torch.dtype, torch.device) -> None\n        if self.cell_anchors is not None:\n            cell_anchors = self.cell_anchors\n            assert cell_anchors is not None\n            # suppose that all anchors have the same device\n            # which is a valid assumption in the current state of the codebase\n            if cell_anchors[0].device == device:\n                return\n\n        # 根据提供的sizes和aspect_ratios生成anchors模板\n        # anchors模板都是以(0, 0)为中心的anchor\n        cell_anchors = [\n            self.generate_anchors(sizes, aspect_ratios, dtype, device)\n            for sizes, aspect_ratios in zip(self.sizes, self.aspect_ratios)\n        ]\n        self.cell_anchors = cell_anchors\n\n    def num_anchors_per_location(self):\n        # 计算每个预测特征层上每个滑动窗口的预测目标数\n        return [len(s) * len(a) for s, a in zip(self.sizes, self.aspect_ratios)]\n\n    # For every combination of (a, (g, s), i) in (self.cell_anchors, zip(grid_sizes, strides), 0:2),\n    # output g[i] anchors that are s[i] distance apart in direction i, with the same dimensions as a.\n    def grid_anchors(self, grid_sizes, strides):\n        # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor]\n        \"\"\"\n        anchors position in grid coordinate axis map into origin image\n        计算预测特征图对应原始图像上的所有anchors的坐标\n        Args:\n            grid_sizes: 预测特征矩阵的height和width\n            strides: 预测特征矩阵上一步对应原始图像上的步距\n        \"\"\"\n        anchors = []\n        cell_anchors = self.cell_anchors\n        assert cell_anchors is not None\n\n        # 遍历每个预测特征层的grid_size，strides和cell_anchors\n        for size, stride, base_anchors in zip(grid_sizes, strides, cell_anchors):\n            grid_height, grid_width = size\n            stride_height, stride_width = stride\n            device = base_anchors.device\n\n            # For output anchor, compute [x_center, y_center, x_center, y_center]\n            # shape: [grid_width] 对应原图上的x坐标(列)\n            shifts_x = torch.arange(0, grid_width, dtype=torch.float32, device=device) * stride_width\n            # shape: [grid_height] 对应原图上的y坐标(行)\n            shifts_y = torch.arange(0, grid_height, dtype=torch.float32, device=device) * stride_height\n\n            # 计算预测特征矩阵上每个点对应原图上的坐标(anchors模板的坐标偏移量)\n            # torch.meshgrid函数分别传入行坐标和列坐标，生成网格行坐标矩阵和网格列坐标矩阵\n            # shape: [grid_height, grid_width]\n            shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)\n            shift_x = shift_x.reshape(-1)\n            shift_y = shift_y.reshape(-1)\n\n            # 计算anchors坐标(xmin, ymin, xmax, ymax)在原图上的坐标偏移量\n            # shape: [grid_width*grid_height, 4]\n            shifts = torch.stack([shift_x, shift_y, shift_x, shift_y], dim=1)\n\n            # For every (base anchor, output anchor) pair,\n            # offset each zero-centered base anchor by the center of the output anchor.\n            # 将anchors模板与原图上的坐标偏移量相加得到原图上所有anchors的坐标信息(shape不同时会使用广播机制)\n            shifts_anchor = shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)\n            anchors.append(shifts_anchor.reshape(-1, 4))\n\n        return anchors  # List[Tensor(all_num_anchors, 4)]\n\n    def cached_grid_anchors(self, grid_sizes, strides):\n        # type: (List[List[int]], List[List[Tensor]]) -> List[Tensor]\n        \"\"\"将计算得到的所有anchors信息进行缓存\"\"\"\n        key = str(grid_sizes) + str(strides)\n        # self._cache是字典类型\n        if key in self._cache:\n            return self._cache[key]\n        anchors = self.grid_anchors(grid_sizes, strides)\n        self._cache[key] = anchors\n        return anchors\n\n    def forward(self, image_list, feature_maps):\n        # type: (ImageList, List[Tensor]) -> List[Tensor]\n        # 获取每个预测特征层的尺寸(height, width)\n        grid_sizes = list([feature_map.shape[-2:] for feature_map in feature_maps])\n\n        # 获取输入图像的height和width\n        image_size = image_list.tensors.shape[-2:]\n\n        # 获取变量类型和设备类型\n        dtype, device = feature_maps[0].dtype, feature_maps[0].device\n\n        # one step in feature map equate n pixel stride in origin image\n        # 计算特征层上的一步等于原始图像上的步长\n        strides = [[torch.tensor(image_size[0] // g[0], dtype=torch.int64, device=device),\n                    torch.tensor(image_size[1] // g[1], dtype=torch.int64, device=device)] for g in grid_sizes]\n\n        # 根据提供的sizes和aspect_ratios生成anchors模板\n        self.set_cell_anchors(dtype, device)\n\n        # 计算/读取所有anchors的坐标信息（这里的anchors信息是映射到原图上的所有anchors信息，不是anchors模板）\n        # 得到的是一个list列表，对应每张预测特征图映射回原图的anchors坐标信息\n        anchors_over_all_feature_maps = self.cached_grid_anchors(grid_sizes, strides)\n\n        anchors = torch.jit.annotate(List[List[torch.Tensor]], [])\n        # 遍历一个batch中的每张图像\n        for i, (image_height, image_width) in enumerate(image_list.image_sizes):\n            anchors_in_image = []\n            # 遍历每张预测特征图映射回原图的anchors坐标信息\n            for anchors_per_feature_map in anchors_over_all_feature_maps:\n                anchors_in_image.append(anchors_per_feature_map)\n            anchors.append(anchors_in_image)\n        # 将每一张图像的所有预测特征层的anchors坐标信息拼接在一起\n        # anchors是个list，每个元素为一张图像的所有anchors信息\n        anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors]\n        # Clear the cache in case that memory leaks.\n        self._cache.clear()\n        return anchors\n\n\nclass RPNHead(nn.Module):\n    \"\"\"\n    add a RPN head with classification and regression\n    通过滑动窗口计算预测目标概率与bbox regression参数\n\n    Arguments:\n        in_channels: number of channels of the input feature\n        num_anchors: number of anchors to be predicted\n    \"\"\"\n\n    def __init__(self, in_channels, num_anchors):\n        super(RPNHead, self).__init__()\n        # 3x3 滑动窗口\n        self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)\n        # 计算预测的目标分数（这里的目标只是指前景或者背景）\n        self.cls_logits = nn.Conv2d(in_channels, num_anchors, kernel_size=1, stride=1)\n        # 计算预测的目标bbox regression参数\n        self.bbox_pred = nn.Conv2d(in_channels, num_anchors * 4, kernel_size=1, stride=1)\n\n        for layer in self.children():\n            if isinstance(layer, nn.Conv2d):\n                torch.nn.init.normal_(layer.weight, std=0.01)\n                torch.nn.init.constant_(layer.bias, 0)\n\n    def forward(self, x):\n        # type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]\n        logits = []\n        bbox_reg = []\n        for i, feature in enumerate(x):\n            t = F.relu(self.conv(feature))\n            logits.append(self.cls_logits(t))\n            bbox_reg.append(self.bbox_pred(t))\n        return logits, bbox_reg\n\n\ndef permute_and_flatten(layer, N, A, C, H, W):\n    # type: (Tensor, int, int, int, int, int) -> Tensor\n    \"\"\"\n    调整tensor顺序，并进行reshape\n    Args:\n        layer: 预测特征层上预测的目标概率或bboxes regression参数\n        N: batch_size\n        A: anchors_num_per_position\n        C: classes_num or 4(bbox coordinate)\n        H: height\n        W: width\n\n    Returns:\n        layer: 调整tensor顺序，并reshape后的结果[N, -1, C]\n    \"\"\"\n    # view和reshape功能是一样的，先展平所有元素在按照给定shape排列\n    # view函数只能用于内存中连续存储的tensor，permute等操作会使tensor在内存中变得不再连续，此时就不能再调用view函数\n    # reshape则不需要依赖目标tensor是否在内存中是连续的\n    # [batch_size, anchors_num_per_position * (C or 4), height, width]\n    layer = layer.view(N, -1, C,  H, W)\n    # 调换tensor维度\n    layer = layer.permute(0, 3, 4, 1, 2)  # [N, H, W, -1, C]\n    layer = layer.reshape(N, -1, C)\n    return layer\n\n\ndef concat_box_prediction_layers(box_cls, box_regression):\n    # type: (List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]\n    \"\"\"\n    对box_cla和box_regression两个list中的每个预测特征层的预测信息\n    的tensor排列顺序以及shape进行调整 -> [N, -1, C]\n    Args:\n        box_cls: 每个预测特征层上的预测目标概率\n        box_regression: 每个预测特征层上的预测目标bboxes regression参数\n\n    Returns:\n\n    \"\"\"\n    box_cls_flattened = []\n    box_regression_flattened = []\n\n    # 遍历每个预测特征层\n    for box_cls_per_level, box_regression_per_level in zip(box_cls, box_regression):\n        # [batch_size, anchors_num_per_position * classes_num, height, width]\n        # 注意，当计算RPN中的proposal时，classes_num=1,只区分目标和背景\n        N, AxC, H, W = box_cls_per_level.shape\n        # # [batch_size, anchors_num_per_position * 4, height, width]\n        Ax4 = box_regression_per_level.shape[1]\n        # anchors_num_per_position\n        A = Ax4 // 4\n        # classes_num\n        C = AxC // A\n\n        # [N, -1, C]\n        box_cls_per_level = permute_and_flatten(box_cls_per_level, N, A, C, H, W)\n        box_cls_flattened.append(box_cls_per_level)\n\n        # [N, -1, C]\n        box_regression_per_level = permute_and_flatten(box_regression_per_level, N, A, 4, H, W)\n        box_regression_flattened.append(box_regression_per_level)\n\n    box_cls = torch.cat(box_cls_flattened, dim=1).flatten(0, -2)  # start_dim, end_dim\n    box_regression = torch.cat(box_regression_flattened, dim=1).reshape(-1, 4)\n    return box_cls, box_regression\n\n\nclass RegionProposalNetwork(torch.nn.Module):\n    \"\"\"\n    Implements Region Proposal Network (RPN).\n\n    Arguments:\n        anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature\n            maps.\n        head (nn.Module): module that computes the objectness and regression deltas\n        fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be\n            considered as positive during training of the RPN.\n        bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be\n            considered as negative during training of the RPN.\n        batch_size_per_image (int): number of anchors that are sampled during training of the RPN\n            for computing the loss\n        positive_fraction (float): proportion of positive anchors in a mini-batch during training\n            of the RPN\n        pre_nms_top_n (Dict[str]): number of proposals to keep before applying NMS. It should\n            contain two fields: training and testing, to allow for different values depending\n            on training or evaluation\n        post_nms_top_n (Dict[str]): number of proposals to keep after applying NMS. It should\n            contain two fields: training and testing, to allow for different values depending\n            on training or evaluation\n        nms_thresh (float): NMS threshold used for postprocessing the RPN proposals\n\n    \"\"\"\n    __annotations__ = {\n        'box_coder': det_utils.BoxCoder,\n        'proposal_matcher': det_utils.Matcher,\n        'fg_bg_sampler': det_utils.BalancedPositiveNegativeSampler,\n        'pre_nms_top_n': Dict[str, int],\n        'post_nms_top_n': Dict[str, int],\n    }\n\n    def __init__(self, anchor_generator, head,\n                 fg_iou_thresh, bg_iou_thresh,\n                 batch_size_per_image, positive_fraction,\n                 pre_nms_top_n, post_nms_top_n, nms_thresh, score_thresh=0.0):\n        super(RegionProposalNetwork, self).__init__()\n        self.anchor_generator = anchor_generator\n        self.head = head\n        self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0))\n\n        # use during training\n        # 计算anchors与真实bbox的iou\n        self.box_similarity = box_ops.box_iou\n\n        self.proposal_matcher = det_utils.Matcher(\n            fg_iou_thresh,  # 当iou大于fg_iou_thresh(0.7)时视为正样本\n            bg_iou_thresh,  # 当iou小于bg_iou_thresh(0.3)时视为负样本\n            allow_low_quality_matches=True\n        )\n\n        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(\n            batch_size_per_image, positive_fraction  # 256, 0.5\n        )\n\n        # use during testing\n        self._pre_nms_top_n = pre_nms_top_n\n        self._post_nms_top_n = post_nms_top_n\n        self.nms_thresh = nms_thresh\n        self.score_thresh = score_thresh\n        self.min_size = 1.\n\n    def pre_nms_top_n(self):\n        if self.training:\n            return self._pre_nms_top_n['training']\n        return self._pre_nms_top_n['testing']\n\n    def post_nms_top_n(self):\n        if self.training:\n            return self._post_nms_top_n['training']\n        return self._post_nms_top_n['testing']\n\n    def assign_targets_to_anchors(self, anchors, targets):\n        # type: (List[Tensor], List[Dict[str, Tensor]]) -> Tuple[List[Tensor], List[Tensor]]\n        \"\"\"\n        计算每个anchors最匹配的gt，并划分为正样本，背景以及废弃的样本\n        Args：\n            anchors: (List[Tensor])\n            targets: (List[Dict[Tensor])\n        Returns:\n            labels: 标记anchors归属类别（1, 0, -1分别对应正样本，背景，废弃的样本）\n                    注意，在RPN中只有前景和背景，所有正样本的类别都是1，0代表背景\n            matched_gt_boxes：与anchors匹配的gt\n        \"\"\"\n        labels = []\n        matched_gt_boxes = []\n        # 遍历每张图像的anchors和targets\n        for anchors_per_image, targets_per_image in zip(anchors, targets):\n            gt_boxes = targets_per_image[\"boxes\"]\n            if gt_boxes.numel() == 0:\n                device = anchors_per_image.device\n                matched_gt_boxes_per_image = torch.zeros(anchors_per_image.shape, dtype=torch.float32, device=device)\n                labels_per_image = torch.zeros((anchors_per_image.shape[0],), dtype=torch.float32, device=device)\n            else:\n                # 计算anchors与真实bbox的iou信息\n                # set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands\n                match_quality_matrix = box_ops.box_iou(gt_boxes, anchors_per_image)\n                # 计算每个anchors与gt匹配iou最大的索引（如果iou<0.3索引置为-1，0.3<iou<0.7索引为-2）\n                matched_idxs = self.proposal_matcher(match_quality_matrix)\n                # get the targets corresponding GT for each proposal\n                # NB: need to clamp the indices because we can have a single\n                # GT in the image, and matched_idxs can be -2, which goes\n                # out of bounds\n                matched_gt_boxes_per_image = gt_boxes[matched_idxs.clamp(min=0)]\n\n                labels_per_image = matched_idxs >= 0\n                labels_per_image = labels_per_image.to(dtype=torch.float32)\n\n                # background (negative examples)\n                bg_indices = matched_idxs == self.proposal_matcher.BELOW_LOW_THRESHOLD  # -1\n                labels_per_image[bg_indices] = 0.0\n\n                # discard indices that are between thresholds\n                inds_to_discard = matched_idxs == self.proposal_matcher.BETWEEN_THRESHOLDS  # -2\n                labels_per_image[inds_to_discard] = -1.0\n\n            labels.append(labels_per_image)\n            matched_gt_boxes.append(matched_gt_boxes_per_image)\n        return labels, matched_gt_boxes\n\n    def _get_top_n_idx(self, objectness, num_anchors_per_level):\n        # type: (Tensor, List[int]) -> Tensor\n        \"\"\"\n        获取每张预测特征图上预测概率排前pre_nms_top_n的anchors索引值\n        Args:\n            objectness: Tensor(每张图像的预测目标概率信息 )\n            num_anchors_per_level: List（每个预测特征层上的预测的anchors个数）\n        Returns:\n\n        \"\"\"\n        r = []  # 记录每个预测特征层上预测目标概率前pre_nms_top_n的索引信息\n        offset = 0\n        # 遍历每个预测特征层上的预测目标概率信息\n        for ob in objectness.split(num_anchors_per_level, 1):\n            if torchvision._is_tracing():\n                num_anchors, pre_nms_top_n = _onnx_get_num_anchors_and_pre_nms_top_n(ob, self.pre_nms_top_n())\n            else:\n                num_anchors = ob.shape[1]  # 预测特征层上的预测的anchors个数\n                pre_nms_top_n = min(self.pre_nms_top_n(), num_anchors)\n\n            # Returns the k largest elements of the given input tensor along a given dimension\n            _, top_n_idx = ob.topk(pre_nms_top_n, dim=1)\n            r.append(top_n_idx + offset)\n            offset += num_anchors\n        return torch.cat(r, dim=1)\n\n    def filter_proposals(self, proposals, objectness, image_shapes, num_anchors_per_level):\n        # type: (Tensor, Tensor, List[Tuple[int, int]], List[int]) -> Tuple[List[Tensor], List[Tensor]]\n        \"\"\"\n        筛除小boxes框，nms处理，根据预测概率获取前post_nms_top_n个目标\n        Args:\n            proposals: 预测的bbox坐标\n            objectness: 预测的目标概率\n            image_shapes: batch中每张图片的size信息\n            num_anchors_per_level: 每个预测特征层上预测anchors的数目\n\n        Returns:\n\n        \"\"\"\n        num_images = proposals.shape[0]\n        device = proposals.device\n\n        # do not backprop throught objectness\n        objectness = objectness.detach()\n        objectness = objectness.reshape(num_images, -1)\n\n        # Returns a tensor of size size filled with fill_value\n        # levels负责记录分隔不同预测特征层上的anchors索引信息\n        levels = [torch.full((n, ), idx, dtype=torch.int64, device=device)\n                  for idx, n in enumerate(num_anchors_per_level)]\n        levels = torch.cat(levels, 0)\n\n        # Expand this tensor to the same size as objectness\n        levels = levels.reshape(1, -1).expand_as(objectness)\n\n        # select top_n boxes independently per level before applying nms\n        # 获取每张预测特征图上预测概率排前pre_nms_top_n的anchors索引值\n        top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level)\n\n        image_range = torch.arange(num_images, device=device)\n        batch_idx = image_range[:, None]  # [batch_size, 1]\n\n        # 根据每个预测特征层预测概率排前pre_nms_top_n的anchors索引值获取相应概率信息\n        objectness = objectness[batch_idx, top_n_idx]\n        levels = levels[batch_idx, top_n_idx]\n        # 预测概率排前pre_nms_top_n的anchors索引值获取相应bbox坐标信息\n        proposals = proposals[batch_idx, top_n_idx]\n\n        objectness_prob = torch.sigmoid(objectness)\n\n        final_boxes = []\n        final_scores = []\n        # 遍历每张图像的相关预测信息\n        for boxes, scores, lvl, img_shape in zip(proposals, objectness_prob, levels, image_shapes):\n            # 调整预测的boxes信息，将越界的坐标调整到图片边界上\n            boxes = box_ops.clip_boxes_to_image(boxes, img_shape)\n\n            # 返回boxes满足宽，高都大于min_size的索引\n            keep = box_ops.remove_small_boxes(boxes, self.min_size)\n            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]\n\n            # 移除小概率boxes，参考下面这个链接\n            # https://github.com/pytorch/vision/pull/3205\n            keep = torch.where(torch.ge(scores, self.score_thresh))[0]  # ge: >=\n            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]\n\n            # non-maximum suppression, independently done per level\n            keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh)\n\n            # keep only topk scoring predictions\n            keep = keep[: self.post_nms_top_n()]\n            boxes, scores = boxes[keep], scores[keep]\n\n            final_boxes.append(boxes)\n            final_scores.append(scores)\n        return final_boxes, final_scores\n\n    def compute_loss(self, objectness, pred_bbox_deltas, labels, regression_targets):\n        # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]\n        \"\"\"\n        计算RPN损失，包括类别损失（前景与背景），bbox regression损失\n        Arguments:\n            objectness (Tensor)：预测的前景概率\n            pred_bbox_deltas (Tensor)：预测的bbox regression\n            labels (List[Tensor])：真实的标签 1, 0, -1（batch中每一张图片的labels对应List的一个元素中）\n            regression_targets (List[Tensor])：真实的bbox regression\n\n        Returns:\n            objectness_loss (Tensor) : 类别损失\n            box_loss (Tensor)：边界框回归损失\n        \"\"\"\n        # 按照给定的batch_size_per_image, positive_fraction选择正负样本\n        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)\n        # 将一个batch中的所有正负样本List(Tensor)分别拼接在一起，并获取非零位置的索引\n        # sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1)\n        sampled_pos_inds = torch.where(torch.cat(sampled_pos_inds, dim=0))[0]\n        # sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1)\n        sampled_neg_inds = torch.where(torch.cat(sampled_neg_inds, dim=0))[0]\n\n        # 将所有正负样本索引拼接在一起\n        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)\n        objectness = objectness.flatten()\n\n        labels = torch.cat(labels, dim=0)\n        regression_targets = torch.cat(regression_targets, dim=0)\n\n        # 计算边界框回归损失\n        box_loss = det_utils.smooth_l1_loss(\n            pred_bbox_deltas[sampled_pos_inds],\n            regression_targets[sampled_pos_inds],\n            beta=1 / 9,\n            size_average=False,\n        ) / (sampled_inds.numel())\n\n        # 计算目标预测概率损失\n        objectness_loss = F.binary_cross_entropy_with_logits(\n            objectness[sampled_inds], labels[sampled_inds]\n        )\n\n        return objectness_loss, box_loss\n\n    def forward(self,\n                images,        # type: ImageList\n                features,      # type: Dict[str, Tensor]\n                targets=None   # type: Optional[List[Dict[str, Tensor]]]\n                ):\n        # type: (...) -> Tuple[List[Tensor], Dict[str, Tensor]]\n        \"\"\"\n        Arguments:\n            images (ImageList): images for which we want to compute the predictions\n            features (Dict[Tensor]): features computed from the images that are\n                used for computing the predictions. Each tensor in the list\n                correspond to different feature levels\n            targets (List[Dict[Tensor]): ground-truth boxes present in the image (optional).\n                If provided, each element in the dict should contain a field `boxes`,\n                with the locations of the ground-truth boxes.\n\n        Returns:\n            boxes (List[Tensor]): the predicted boxes from the RPN, one Tensor per\n                image.\n            losses (Dict[Tensor]): the losses for the model during training. During\n                testing, it is an empty dict.\n        \"\"\"\n        # RPN uses all feature maps that are available\n        # features是所有预测特征层组成的OrderedDict\n        features = list(features.values())\n\n        # 计算每个预测特征层上的预测目标概率和bboxes regression参数\n        # objectness和pred_bbox_deltas都是list\n        objectness, pred_bbox_deltas = self.head(features)\n\n        # 生成一个batch图像的所有anchors信息,list(tensor)元素个数等于batch_size\n        anchors = self.anchor_generator(images, features)\n\n        # batch_size\n        num_images = len(anchors)\n\n        # numel() Returns the total number of elements in the input tensor.\n        # 计算每个预测特征层上的对应的anchors数量\n        num_anchors_per_level_shape_tensors = [o[0].shape for o in objectness]\n        num_anchors_per_level = [s[0] * s[1] * s[2] for s in num_anchors_per_level_shape_tensors]\n\n        # 调整内部tensor格式以及shape\n        objectness, pred_bbox_deltas = concat_box_prediction_layers(objectness,\n                                                                    pred_bbox_deltas)\n\n        # apply pred_bbox_deltas to anchors to obtain the decoded proposals\n        # note that we detach the deltas because Faster R-CNN do not backprop through\n        # the proposals\n        # 将预测的bbox regression参数应用到anchors上得到最终预测bbox坐标\n        proposals = self.box_coder.decode(pred_bbox_deltas.detach(), anchors)\n        proposals = proposals.view(num_images, -1, 4)\n\n        # 筛除小boxes框，nms处理，根据预测概率获取前post_nms_top_n个目标\n        boxes, scores = self.filter_proposals(proposals, objectness, images.image_sizes, num_anchors_per_level)\n\n        losses = {}\n        if self.training:\n            assert targets is not None\n            # 计算每个anchors最匹配的gt，并将anchors进行分类，前景，背景以及废弃的anchors\n            labels, matched_gt_boxes = self.assign_targets_to_anchors(anchors, targets)\n            # 结合anchors以及对应的gt，计算regression参数\n            regression_targets = self.box_coder.encode(matched_gt_boxes, anchors)\n            loss_objectness, loss_rpn_box_reg = self.compute_loss(\n                objectness, pred_bbox_deltas, labels, regression_targets\n            )\n            losses = {\n                \"loss_objectness\": loss_objectness,\n                \"loss_rpn_box_reg\": loss_rpn_box_reg\n            }\n        return boxes, losses\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/network_files/transform.py",
    "content": "import math\nfrom typing import List, Tuple, Dict, Optional\n\nimport torch\nfrom torch import nn, Tensor\nimport torchvision\n\nfrom .image_list import ImageList\n\n\n@torch.jit.unused\ndef _resize_image_onnx(image, self_min_size, self_max_size):\n    # type: (Tensor, float, float) -> Tensor\n    from torch.onnx import operators\n    im_shape = operators.shape_as_tensor(image)[-2:]\n    min_size = torch.min(im_shape).to(dtype=torch.float32)\n    max_size = torch.max(im_shape).to(dtype=torch.float32)\n    scale_factor = torch.min(self_min_size / min_size, self_max_size / max_size)\n\n    image = torch.nn.functional.interpolate(\n        image[None], scale_factor=scale_factor, mode=\"bilinear\", recompute_scale_factor=True,\n        align_corners=False)[0]\n\n    return image\n\n\ndef _resize_image(image, self_min_size, self_max_size):\n    # type: (Tensor, float, float) -> Tensor\n    im_shape = torch.tensor(image.shape[-2:])\n    min_size = float(torch.min(im_shape))    # 获取高宽中的最小值\n    max_size = float(torch.max(im_shape))    # 获取高宽中的最大值\n    scale_factor = self_min_size / min_size  # 根据指定最小边长和图片最小边长计算缩放比例\n\n    # 如果使用该缩放比例计算的图片最大边长大于指定的最大边长\n    if max_size * scale_factor > self_max_size:\n        scale_factor = self_max_size / max_size  # 将缩放比例设为指定最大边长和图片最大边长之比\n\n    # interpolate利用插值的方法缩放图片\n    # image[None]操作是在最前面添加batch维度[C, H, W] -> [1, C, H, W]\n    # bilinear只支持4D Tensor\n    image = torch.nn.functional.interpolate(\n        image[None], scale_factor=scale_factor, mode=\"bilinear\", recompute_scale_factor=True,\n        align_corners=False)[0]\n\n    return image\n\n\nclass GeneralizedRCNNTransform(nn.Module):\n    \"\"\"\n    Performs input / target transformation before feeding the data to a GeneralizedRCNN\n    model.\n\n    The transformations it perform are:\n        - input normalization (mean subtraction and std division)\n        - input / target resizing to match min_size / max_size\n\n    It returns a ImageList for the inputs, and a List[Dict[Tensor]] for the targets\n    \"\"\"\n\n    def __init__(self, min_size, max_size, image_mean, image_std):\n        super(GeneralizedRCNNTransform, self).__init__()\n        if not isinstance(min_size, (list, tuple)):\n            min_size = (min_size,)\n        self.min_size = min_size      # 指定图像的最小边长范围\n        self.max_size = max_size      # 指定图像的最大边长范围\n        self.image_mean = image_mean  # 指定图像在标准化处理中的均值\n        self.image_std = image_std    # 指定图像在标准化处理中的方差\n\n    def normalize(self, image):\n        \"\"\"标准化处理\"\"\"\n        dtype, device = image.dtype, image.device\n        mean = torch.as_tensor(self.image_mean, dtype=dtype, device=device)\n        std = torch.as_tensor(self.image_std, dtype=dtype, device=device)\n        # [:, None, None]: shape [3] -> [3, 1, 1]\n        return (image - mean[:, None, None]) / std[:, None, None]\n\n    def torch_choice(self, k):\n        # type: (List[int]) -> int\n        \"\"\"\n        Implements `random.choice` via torch ops so it can be compiled with\n        TorchScript. Remove if https://github.com/pytorch/pytorch/issues/25803\n        is fixed.\n        \"\"\"\n        index = int(torch.empty(1).uniform_(0., float(len(k))).item())\n        return k[index]\n\n    def resize(self, image, target):\n        # type: (Tensor, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]\n        \"\"\"\n        将图片缩放到指定的大小范围内，并对应缩放bboxes信息\n        Args:\n            image: 输入的图片\n            target: 输入图片的相关信息（包括bboxes信息）\n\n        Returns:\n            image: 缩放后的图片\n            target: 缩放bboxes后的图片相关信息\n        \"\"\"\n        # image shape is [channel, height, width]\n        h, w = image.shape[-2:]\n\n        if self.training:\n            size = float(self.torch_choice(self.min_size))  # 指定输入图片的最小边长,注意是self.min_size不是min_size\n        else:\n            # FIXME assume for now that testing uses the largest scale\n            size = float(self.min_size[-1])    # 指定输入图片的最小边长,注意是self.min_size不是min_size\n\n        if torchvision._is_tracing():\n            image = _resize_image_onnx(image, size, float(self.max_size))\n        else:\n            image = _resize_image(image, size, float(self.max_size))\n\n        if target is None:\n            return image, target\n\n        bbox = target[\"boxes\"]\n        # 根据图像的缩放比例来缩放bbox\n        bbox = resize_boxes(bbox, [h, w], image.shape[-2:])\n        target[\"boxes\"] = bbox\n\n        return image, target\n\n    # _onnx_batch_images() is an implementation of\n    # batch_images() that is supported by ONNX tracing.\n    @torch.jit.unused\n    def _onnx_batch_images(self, images, size_divisible=32):\n        # type: (List[Tensor], int) -> Tensor\n        max_size = []\n        for i in range(images[0].dim()):\n            max_size_i = torch.max(torch.stack([img.shape[i] for img in images]).to(torch.float32)).to(torch.int64)\n            max_size.append(max_size_i)\n        stride = size_divisible\n        max_size[1] = (torch.ceil((max_size[1].to(torch.float32)) / stride) * stride).to(torch.int64)\n        max_size[2] = (torch.ceil((max_size[2].to(torch.float32)) / stride) * stride).to(torch.int64)\n        max_size = tuple(max_size)\n\n        # work around for\n        # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)\n        # which is not yet supported in onnx\n        padded_imgs = []\n        for img in images:\n            padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]\n            padded_img = torch.nn.functional.pad(img, [0, padding[2], 0, padding[1], 0, padding[0]])\n            padded_imgs.append(padded_img)\n\n        return torch.stack(padded_imgs)\n\n    def max_by_axis(self, the_list):\n        # type: (List[List[int]]) -> List[int]\n        maxes = the_list[0]\n        for sublist in the_list[1:]:\n            for index, item in enumerate(sublist):\n                maxes[index] = max(maxes[index], item)\n        return maxes\n\n    def batch_images(self, images, size_divisible=32):\n        # type: (List[Tensor], int) -> Tensor\n        \"\"\"\n        将一批图像打包成一个batch返回（注意batch中每个tensor的shape是相同的）\n        Args:\n            images: 输入的一批图片\n            size_divisible: 将图像高和宽调整到该数的整数倍\n\n        Returns:\n            batched_imgs: 打包成一个batch后的tensor数据\n        \"\"\"\n\n        if torchvision._is_tracing():\n            # batch_images() does not export well to ONNX\n            # call _onnx_batch_images() instead\n            return self._onnx_batch_images(images, size_divisible)\n\n        # 分别计算一个batch中所有图片中的最大channel, height, width\n        max_size = self.max_by_axis([list(img.shape) for img in images])\n\n        stride = float(size_divisible)\n        # max_size = list(max_size)\n        # 将height向上调整到stride的整数倍\n        max_size[1] = int(math.ceil(float(max_size[1]) / stride) * stride)\n        # 将width向上调整到stride的整数倍\n        max_size[2] = int(math.ceil(float(max_size[2]) / stride) * stride)\n\n        # [batch, channel, height, width]\n        batch_shape = [len(images)] + max_size\n\n        # 创建shape为batch_shape且值全部为0的tensor\n        batched_imgs = images[0].new_full(batch_shape, 0)\n        for img, pad_img in zip(images, batched_imgs):\n            # 将输入images中的每张图片复制到新的batched_imgs的每张图片中，对齐左上角，保证bboxes的坐标不变\n            # 这样保证输入到网络中一个batch的每张图片的shape相同\n            # copy_: Copies the elements from src into self tensor and returns self\n            pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)\n\n        return batched_imgs\n\n    def postprocess(self,\n                    result,                # type: List[Dict[str, Tensor]]\n                    image_shapes,          # type: List[Tuple[int, int]]\n                    original_image_sizes   # type: List[Tuple[int, int]]\n                    ):\n        # type: (...) -> List[Dict[str, Tensor]]\n        \"\"\"\n        对网络的预测结果进行后处理（主要将bboxes还原到原图像尺度上）\n        Args:\n            result: list(dict), 网络的预测结果, len(result) == batch_size\n            image_shapes: list(torch.Size), 图像预处理缩放后的尺寸, len(image_shapes) == batch_size\n            original_image_sizes: list(torch.Size), 图像的原始尺寸, len(original_image_sizes) == batch_size\n\n        Returns:\n\n        \"\"\"\n        if self.training:\n            return result\n\n        # 遍历每张图片的预测信息，将boxes信息还原回原尺度\n        for i, (pred, im_s, o_im_s) in enumerate(zip(result, image_shapes, original_image_sizes)):\n            boxes = pred[\"boxes\"]\n            boxes = resize_boxes(boxes, im_s, o_im_s)  # 将bboxes缩放回原图像尺度上\n            result[i][\"boxes\"] = boxes\n        return result\n\n    def __repr__(self):\n        \"\"\"自定义输出实例化对象的信息，可通过print打印实例信息\"\"\"\n        format_string = self.__class__.__name__ + '('\n        _indent = '\\n    '\n        format_string += \"{0}Normalize(mean={1}, std={2})\".format(_indent, self.image_mean, self.image_std)\n        format_string += \"{0}Resize(min_size={1}, max_size={2}, mode='bilinear')\".format(_indent, self.min_size,\n                                                                                         self.max_size)\n        format_string += '\\n)'\n        return format_string\n\n    def forward(self,\n                images,       # type: List[Tensor]\n                targets=None  # type: Optional[List[Dict[str, Tensor]]]\n                ):\n        # type: (...) -> Tuple[ImageList, Optional[List[Dict[str, Tensor]]]]\n        images = [img for img in images]\n        for i in range(len(images)):\n            image = images[i]\n            target_index = targets[i] if targets is not None else None\n\n            if image.dim() != 3:\n                raise ValueError(\"images is expected to be a list of 3d tensors \"\n                                 \"of shape [C, H, W], got {}\".format(image.shape))\n            image = self.normalize(image)                # 对图像进行标准化处理\n            image, target_index = self.resize(image, target_index)   # 对图像和对应的bboxes缩放到指定范围\n            images[i] = image\n            if targets is not None and target_index is not None:\n                targets[i] = target_index\n\n        # 记录resize后的图像尺寸\n        image_sizes = [img.shape[-2:] for img in images]\n        images = self.batch_images(images)  # 将images打包成一个batch\n        image_sizes_list = torch.jit.annotate(List[Tuple[int, int]], [])\n\n        for image_size in image_sizes:\n            assert len(image_size) == 2\n            image_sizes_list.append((image_size[0], image_size[1]))\n\n        image_list = ImageList(images, image_sizes_list)\n        return image_list, targets\n\n\ndef resize_boxes(boxes, original_size, new_size):\n    # type: (Tensor, List[int], List[int]) -> Tensor\n    \"\"\"\n    将boxes参数根据图像的缩放情况进行相应缩放\n\n    Arguments:\n        original_size: 图像缩放前的尺寸\n        new_size: 图像缩放后的尺寸\n    \"\"\"\n    ratios = [\n        torch.tensor(s, dtype=torch.float32, device=boxes.device) /\n        torch.tensor(s_orig, dtype=torch.float32, device=boxes.device)\n        for s, s_orig in zip(new_size, original_size)\n    ]\n    ratios_height, ratios_width = ratios\n    # Removes a tensor dimension, boxes [minibatch, 4]\n    # Returns a tuple of all slices along a given dimension, already without it.\n    xmin, ymin, xmax, ymax = boxes.unbind(1)\n    xmin = xmin * ratios_width\n    xmax = xmax * ratios_width\n    ymin = ymin * ratios_height\n    ymax = ymax * ratios_height\n    return torch.stack((xmin, ymin, xmax, ymax), dim=1)\n\n\n\n\n\n\n\n\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/plot_curve.py",
    "content": "import datetime\nimport matplotlib.pyplot as plt\n\n\ndef plot_loss_and_lr(train_loss, learning_rate):\n    try:\n        x = list(range(len(train_loss)))\n        fig, ax1 = plt.subplots(1, 1)\n        ax1.plot(x, train_loss, 'r', label='loss')\n        ax1.set_xlabel(\"step\")\n        ax1.set_ylabel(\"loss\")\n        ax1.set_title(\"Train Loss and lr\")\n        plt.legend(loc='best')\n\n        ax2 = ax1.twinx()\n        ax2.plot(x, learning_rate, label='lr')\n        ax2.set_ylabel(\"learning rate\")\n        ax2.set_xlim(0, len(train_loss))  # 设置横坐标整数间隔\n        plt.legend(loc='best')\n\n        handles1, labels1 = ax1.get_legend_handles_labels()\n        handles2, labels2 = ax2.get_legend_handles_labels()\n        plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right')\n\n        fig.subplots_adjust(right=0.8)  # 防止出现保存图片显示不全的情况\n        fig.savefig('./loss_and_lr{}.png'.format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")))\n        plt.close()\n        print(\"successful save loss curve! \")\n    except Exception as e:\n        print(e)\n\n\ndef plot_map(mAP):\n    try:\n        x = list(range(len(mAP)))\n        plt.plot(x, mAP, label='mAp')\n        plt.xlabel('epoch')\n        plt.ylabel('mAP')\n        plt.title('Eval mAP')\n        plt.xlim(0, len(mAP))\n        plt.legend(loc='best')\n        plt.savefig('./mAP.png')\n        plt.close()\n        print(\"successful save mAP curve!\")\n    except Exception as e:\n        print(e)\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/predict.py",
    "content": "import os\nimport time\nimport json\n\nimport torch\nimport torchvision\nfrom PIL import Image\nimport matplotlib.pyplot as plt\nfrom torchvision import transforms\nfrom torchvision.models.feature_extraction import create_feature_extractor\n\nfrom network_files import FasterRCNN, AnchorsGenerator\nfrom backbone import vgg, MobileNetV2, resnet50\nfrom draw_box_utils import draw_objs\n\n\ndef create_model(num_classes):\n    res50 = resnet50()\n    backbone = create_feature_extractor(res50, return_nodes={\"layer3\": \"0\"})\n    backbone.out_channels = 1024\n\n    anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),),\n                                        aspect_ratios=((0.5, 1.0, 2.0),))\n\n    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],  # 在哪些特征层上进行roi pooling\n                                                    output_size=[7, 7],  # roi_pooling输出特征矩阵尺寸\n                                                    sampling_ratio=2)  # 采样率\n\n    model = FasterRCNN(backbone=backbone,\n                       num_classes=num_classes,\n                       rpn_anchor_generator=anchor_generator,\n                       box_roi_pool=roi_pooler)\n\n    return model\n\n\ndef time_synchronized():\n    torch.cuda.synchronize() if torch.cuda.is_available() else None\n    return time.time()\n\n\ndef main():\n    # get devices\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n    print(\"using {} device.\".format(device))\n\n    # create model\n    num_classes = 90  # 不包含背景\n    model = create_model(num_classes=num_classes + 1)\n\n    # load train weights\n    weights_path = \"./save_weights/model_25.pth\"\n    assert os.path.exists(weights_path), \"{} file dose not exist.\".format(weights_path)\n    weights_dict = torch.load(weights_path, map_location='cpu')\n    weights_dict = weights_dict[\"model\"] if \"model\" in weights_dict else weights_dict\n    model.load_state_dict(weights_dict)\n    model.to(device)\n\n    # read class_indict\n    label_json_path = './coco91_indices.json'\n    assert os.path.exists(label_json_path), \"json file {} dose not exist.\".format(label_json_path)\n    with open(label_json_path, 'r') as f:\n        category_index = json.load(f)\n\n    # load image\n    original_img = Image.open(\"./test.jpg\")\n\n    # from pil image to tensor, do not normalize image\n    data_transform = transforms.Compose([transforms.ToTensor()])\n    img = data_transform(original_img)\n    # expand batch dimension\n    img = torch.unsqueeze(img, dim=0)\n\n    model.eval()  # 进入验证模式\n    with torch.no_grad():\n        # init\n        img_height, img_width = img.shape[-2:]\n        init_img = torch.zeros((1, 3, img_height, img_width), device=device)\n        model(init_img)\n\n        t_start = time_synchronized()\n        predictions = model(img.to(device))[0]\n        t_end = time_synchronized()\n        print(\"inference+NMS time: {}\".format(t_end - t_start))\n\n        predict_boxes = predictions[\"boxes\"].to(\"cpu\").numpy()\n        predict_classes = predictions[\"labels\"].to(\"cpu\").numpy()\n        predict_scores = predictions[\"scores\"].to(\"cpu\").numpy()\n\n        if len(predict_boxes) == 0:\n            print(\"没有检测到任何目标!\")\n\n        plot_img = draw_objs(original_img,\n                             predict_boxes,\n                             predict_classes,\n                             predict_scores,\n                             category_index=category_index,\n                             box_thresh=0.5,\n                             line_thickness=3,\n                             font='arial.ttf',\n                             font_size=20)\n        plt.imshow(plot_img)\n        plt.show()\n        # 保存预测的图片结果\n        plot_img.save(\"test_result.jpg\")\n\n\nif __name__ == '__main__':\n    main()\n\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/requirements.txt",
    "content": "lxml\nmatplotlib\nnumpy\ntqdm\npycocotools\nPillow\ntorch==1.10\ntorchvision==0.11.1\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/results20220408-201436.txt",
    "content": "epoch:0 0.0504  0.1144  0.0362  0.0207  0.0601  0.0657  0.0702  0.1069  0.1087  0.0335  0.1153  0.1486  1.7430  0.005000\nepoch:1 0.1138  0.2300  0.0994  0.0494  0.1279  0.1554  0.1303  0.1940  0.1980  0.0747  0.2051  0.2831  1.2282  0.005000\nepoch:2 0.1461  0.2773  0.1394  0.0636  0.1635  0.1997  0.1530  0.2243  0.2288  0.0938  0.2435  0.3309  1.1391  0.005000\nepoch:3 0.1669  0.3134  0.1642  0.0750  0.1843  0.2282  0.1680  0.2509  0.2561  0.1091  0.2705  0.3701  1.0902  0.005000\nepoch:4 0.1857  0.3389  0.1828  0.0829  0.2074  0.2568  0.1830  0.2708  0.2756  0.1140  0.2937  0.3998  1.0581  0.005000\nepoch:5 0.1908  0.3431  0.1930  0.0901  0.2128  0.2578  0.1839  0.2704  0.2753  0.1197  0.2927  0.3893  1.0337  0.005000\nepoch:6 0.2044  0.3634  0.2077  0.0954  0.2247  0.2796  0.1947  0.2893  0.2956  0.1317  0.3138  0.4178  1.0127  0.005000\nepoch:7 0.2068  0.3651  0.2099  0.0953  0.2269  0.2840  0.1959  0.2869  0.2926  0.1290  0.3093  0.4186  0.9945  0.005000\nepoch:8 0.2171  0.3788  0.2218  0.0996  0.2470  0.2969  0.2012  0.3001  0.3071  0.1329  0.3375  0.4371  0.9806  0.005000\nepoch:9 0.2146  0.3717  0.2207  0.0946  0.2315  0.3038  0.2011  0.2910  0.2962  0.1277  0.3091  0.4321  0.9691  0.005000\nepoch:10 0.2280  0.3974  0.2345  0.1035  0.2535  0.3108  0.2118  0.3119  0.3182  0.1402  0.3429  0.4537  0.9567  0.005000\nepoch:11 0.2332  0.3983  0.2443  0.1111  0.2534  0.3149  0.2136  0.3128  0.3190  0.1515  0.3417  0.4438  0.9450  0.005000\nepoch:12 0.2400  0.4094  0.2486  0.1102  0.2622  0.3251  0.2175  0.3214  0.3289  0.1507  0.3521  0.4588  0.9369  0.005000\nepoch:13 0.2449  0.4152  0.2563  0.1121  0.2741  0.3308  0.2234  0.3286  0.3363  0.1552  0.3703  0.4627  0.9286  0.005000\nepoch:14 0.2466  0.4192  0.2542  0.1131  0.2765  0.3412  0.2220  0.3258  0.3322  0.1481  0.3627  0.4776  0.9203  0.005000\nepoch:15 0.2492  0.4216  0.2569  0.1147  0.2781  0.3417  0.2254  0.3337  0.3402  0.1565  0.3666  0.4893  0.9116  0.005000\nepoch:16 0.2689  0.4433  0.2814  0.1246  0.2963  0.3705  0.2384  0.3495  0.3569  0.1671  0.3864  0.5046  0.8616  0.000500\nepoch:17 0.2719  0.4473  0.2865  0.1243  0.3021  0.3743  0.2399  0.3519  0.3593  0.1669  0.3931  0.5017  0.8515  0.000500\nepoch:18 0.2738  0.4521  0.2857  0.1256  0.3048  0.3718  0.2416  0.3564  0.3645  0.1713  0.3996  0.5037  0.8472  0.000500\nepoch:19 0.2759  0.4534  0.2893  0.1259  0.3094  0.3719  0.2448  0.3603  0.3681  0.1691  0.4073  0.5055  0.8439  0.000500\nepoch:20 0.2720  0.4483  0.2838  0.1250  0.3021  0.3681  0.2400  0.3532  0.3613  0.1688  0.3944  0.4994  0.8417  0.000500\nepoch:21 0.2748  0.4501  0.2904  0.1241  0.3019  0.3759  0.2421  0.3561  0.3641  0.1682  0.3941  0.5101  0.8378  0.000500\nepoch:22 0.2754  0.4532  0.2896  0.1281  0.3064  0.3759  0.2419  0.3586  0.3660  0.1712  0.3993  0.5115  0.8304  0.000050\nepoch:23 0.2757  0.4516  0.2907  0.1271  0.3068  0.3748  0.2423  0.3572  0.3650  0.1692  0.4005  0.5087  0.8307  0.000050\nepoch:24 0.2750  0.4500  0.2888  0.1256  0.3017  0.3760  0.2411  0.3536  0.3611  0.1669  0.3894  0.5040  0.8299  0.000050\nepoch:25 0.2769  0.4537  0.2903  0.1263  0.3082  0.3782  0.2424  0.3582  0.3663  0.1693  0.4020  0.5116  0.8281  0.000050\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/train.py",
    "content": "import os\nimport datetime\n\nimport torch\nimport torchvision\n\nimport transforms\nfrom network_files import FasterRCNN, AnchorsGenerator\nfrom backbone import MobileNetV2, vgg, resnet50\nfrom my_dataset import CocoDetection\nfrom train_utils import train_eval_utils as utils\nfrom train_utils import GroupedBatchSampler, create_aspect_ratio_groups\nfrom torchvision.models.feature_extraction import create_feature_extractor\n\n\ndef create_model(num_classes):\n    # 以vgg16为backbone\n    # 预训练权重地址： https://download.pytorch.org/models/vgg16-397923af.pth\n    # vgg16 = vgg(model_name=\"vgg16\", weights_path=\"./vgg16.pth\")\n    # backbone = create_feature_extractor(vgg16, return_nodes={\"features.29\": \"0\"})  # 删除feature中最后的maxpool层\n    # backbone.out_channels = 512\n\n    # 以resnet50为backbone\n    # 预训练权重地址：https://download.pytorch.org/models/resnet50-19c8e357.pth\n    res50 = resnet50()\n    res50.load_state_dict(torch.load(\"./resnet50.pth\", map_location=\"cpu\"))\n    backbone = create_feature_extractor(res50, return_nodes={\"layer3\": \"0\"})\n    backbone.out_channels = 1024\n\n    # 以mobilenetv2为backbone\n    # 预训练权重地址：https://download.pytorch.org/models/mobilenet_v2-b0353104.pth\n    # backbone = MobileNetV2(weights_path=\"./mobilenet_v2.pth\").features\n    # backbone.out_channels = 1280  # 设置对应backbone输出特征矩阵的channels\n\n    anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),),\n                                        aspect_ratios=((0.5, 1.0, 2.0),))\n\n    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],  # 在哪些特征层上进行roi pooling\n                                                    output_size=[7, 7],   # roi_pooling输出特征矩阵尺寸\n                                                    sampling_ratio=2)  # 采样率\n\n    model = FasterRCNN(backbone=backbone,\n                       num_classes=num_classes,\n                       rpn_anchor_generator=anchor_generator,\n                       box_roi_pool=roi_pooler)\n\n    return model\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n    print(\"Using {} device training.\".format(device.type))\n\n    # 用来保存coco_info的文件\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.ToTensor(),\n                                     transforms.RandomHorizontalFlip(0.5)]),\n        \"val\": transforms.Compose([transforms.ToTensor()])\n    }\n\n    COCO_root = args.data_path\n\n    # load train data set\n    # coco2017 -> annotations -> instances_train2017.json\n    train_dataset = CocoDetection(COCO_root, \"train\", data_transform[\"train\"])\n    train_sampler = None\n\n    # 是否按图片相似高宽比采样图片组成batch\n    # 使用的话能够减小训练时所需GPU显存，默认使用\n    if args.aspect_ratio_group_factor >= 0:\n        train_sampler = torch.utils.data.RandomSampler(train_dataset)\n        # 统计所有图像高宽比例在bins区间中的位置索引\n        group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor)\n        # 每个batch图片从同一高宽比例区间中取\n        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)\n\n    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch\n    batch_size = args.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using %g dataloader workers' % nw)\n\n    if train_sampler:\n        # 如果按照图片高宽比采样图片，dataloader中需要使用batch_sampler\n        train_data_loader = torch.utils.data.DataLoader(train_dataset,\n                                                        batch_sampler=train_batch_sampler,\n                                                        pin_memory=True,\n                                                        num_workers=nw,\n                                                        collate_fn=train_dataset.collate_fn)\n    else:\n        train_data_loader = torch.utils.data.DataLoader(train_dataset,\n                                                        batch_size=batch_size,\n                                                        shuffle=True,\n                                                        pin_memory=True,\n                                                        num_workers=nw,\n                                                        collate_fn=train_dataset.collate_fn)\n\n    # load validation data set\n    # coco2017 -> annotations -> instances_val2017.json\n    val_dataset = CocoDetection(COCO_root, \"val\", data_transform[\"val\"])\n    val_data_loader = torch.utils.data.DataLoader(val_dataset,\n                                                  batch_size=1,\n                                                  shuffle=False,\n                                                  pin_memory=True,\n                                                  num_workers=nw,\n                                                  collate_fn=train_dataset.collate_fn)\n\n    # create model num_classes equal background + classes\n    model = create_model(num_classes=args.num_classes + 1)\n    # print(model)\n\n    model.to(device)\n\n    train_loss = []\n    learning_rate = []\n    val_map = []\n\n    # define optimizer\n    params = [p for p in model.parameters() if p.requires_grad]\n    optimizer = torch.optim.SGD(params, lr=args.lr,\n                                momentum=args.momentum,\n                                weight_decay=args.weight_decay)\n\n    scaler = torch.cuda.amp.GradScaler() if args.amp else None\n\n    # learning rate scheduler\n    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,\n                                                        milestones=args.lr_steps,\n                                                        gamma=args.lr_gamma)\n    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练\n    if args.resume:\n        # If map_location is missing, torch.load will first load the module to CPU\n        # and then copy each parameter to where it was saved,\n        # which would result in all processes on the same machine using the same set of devices.\n        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)\n        model.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n        if args.amp and \"scaler\" in checkpoint:\n            scaler.load_state_dict(checkpoint[\"scaler\"])\n\n    for epoch in range(args.start_epoch, args.epochs):\n        # train for one epoch, printing every 50 iterations\n        mean_loss, lr = utils.train_one_epoch(model, optimizer, train_data_loader,\n                                              device, epoch, print_freq=50,\n                                              warmup=True, scaler=scaler)\n        train_loss.append(mean_loss.item())\n        learning_rate.append(lr)\n\n        # update the learning rate\n        lr_scheduler.step()\n\n        # evaluate on the test dataset\n        coco_info = utils.evaluate(model, val_data_loader, device=device)\n\n        # write into txt\n        with open(results_file, \"a\") as f:\n            # 写入的数据包括coco指标还有loss和learning rate\n            result_info = [f\"{i:.4f}\" for i in coco_info + [mean_loss.item()]] + [f\"{lr:.6f}\"]\n            txt = \"epoch:{} {}\".format(epoch, '  '.join(result_info))\n            f.write(txt + \"\\n\")\n\n        val_map.append(coco_info[1])  # pascal mAP\n\n        # save weights\n        save_files = {\n            'model': model.state_dict(),\n            'optimizer': optimizer.state_dict(),\n            'lr_scheduler': lr_scheduler.state_dict(),\n            'epoch': epoch}\n        if args.amp:\n            save_files[\"scaler\"] = scaler.state_dict()\n        torch.save(save_files, \"./save_weights/model_{}.pth\".format(epoch))\n\n    # plot loss and lr curve\n    if len(train_loss) != 0 and len(learning_rate) != 0:\n        from plot_curve import plot_loss_and_lr\n        plot_loss_and_lr(train_loss, learning_rate)\n\n    # plot mAP curve\n    if len(val_map) != 0:\n        from plot_curve import plot_map\n        plot_map(val_map)\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 训练设备类型\n    parser.add_argument('--device', default='cuda:0', help='device')\n    # 训练数据集的根目录\n    parser.add_argument('--data-path', default='/data/coco2017', help='dataset')\n    # 检测目标类别数(不包含背景)\n    parser.add_argument('--num-classes', default=90, type=int, help='num_classes')\n    # 文件保存地址\n    parser.add_argument('--output-dir', default='./save_weights', help='path where to save')\n    # 若需要接着上次训练，则指定上次训练保存权重文件地址\n    parser.add_argument('--resume', default='', type=str, help='resume from checkpoint')\n    # 指定接着从哪个epoch数开始训练\n    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')\n    # 训练的总epoch数\n    parser.add_argument('--epochs', default=26, type=int, metavar='N',\n                        help='number of total epochs to run')\n    # 学习率\n    parser.add_argument('--lr', default=0.005, type=float,\n                        help='initial learning rate, 0.02 is the default value for training '\n                             'on 8 gpus and 2 images_per_gpu')\n    # SGD的momentum参数\n    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',\n                        help='momentum')\n    # SGD的weight_decay参数\n    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,\n                        metavar='W', help='weight decay (default: 1e-4)',\n                        dest='weight_decay')\n    # 针对torch.optim.lr_scheduler.MultiStepLR的参数\n    parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int,\n                        help='decrease lr every step-size epochs')\n    # 针对torch.optim.lr_scheduler.MultiStepLR的参数\n    parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')\n    # 训练的batch size(如果内存/GPU显存充裕，建议设置更大)\n    parser.add_argument('--batch_size', default=4, type=int, metavar='N',\n                        help='batch size when training.')\n    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)\n    # 是否使用混合精度训练(需要GPU支持混合精度)\n    parser.add_argument(\"--amp\", default=False, help=\"Use torch.cuda.amp for mixed precision training\")\n\n    args = parser.parse_args()\n    print(args)\n\n    # 检查保存权重文件夹是否存在，不存在则创建\n    if not os.path.exists(args.output_dir):\n        os.makedirs(args.output_dir)\n\n    main(args)\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/train_multi_GPU.py",
    "content": "import time\nimport os\nimport datetime\n\nimport torch\nimport torchvision\n\nimport transforms\nfrom my_dataset import CocoDetection\nfrom backbone import resnet50\nfrom network_files import FasterRCNN, AnchorsGenerator\nimport train_utils.train_eval_utils as utils\nfrom train_utils import GroupedBatchSampler, create_aspect_ratio_groups, init_distributed_mode, save_on_master, mkdir\nfrom torchvision.models.feature_extraction import create_feature_extractor\n\n\ndef create_model(num_classes):\n    # 以resnet50为backbone\n    # 预训练权重地址：https://download.pytorch.org/models/resnet50-19c8e357.pth\n    res50 = resnet50()\n    res50.load_state_dict(torch.load(\"./resnet50.pth\", map_location=\"cpu\"))\n    backbone = create_feature_extractor(res50, return_nodes={\"layer3\": \"0\"})\n    backbone.out_channels = 1024\n\n    anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),),\n                                        aspect_ratios=((0.5, 1.0, 2.0),))\n\n    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],  # 在哪些特征层上进行roi pooling\n                                                    output_size=[7, 7],  # roi_pooling输出特征矩阵尺寸\n                                                    sampling_ratio=2)  # 采样率\n\n    model = FasterRCNN(backbone=backbone,\n                       num_classes=num_classes,\n                       rpn_anchor_generator=anchor_generator,\n                       box_roi_pool=roi_pooler)\n\n    return model\n\n\ndef main(args):\n    init_distributed_mode(args)\n    print(args)\n\n    device = torch.device(args.device)\n\n    # 用来保存coco_info的文件\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    # Data loading code\n    print(\"Loading data\")\n\n    data_transform = {\n        \"train\": transforms.Compose([transforms.ToTensor(),\n                                     transforms.RandomHorizontalFlip(0.5)]),\n        \"val\": transforms.Compose([transforms.ToTensor()])\n    }\n\n    COCO_root = args.data_path\n\n    # load train data set\n    # coco2017 -> annotations -> instances_train2017.json\n    train_dataset = CocoDetection(COCO_root, \"train\", data_transform[\"train\"])\n\n    # load validation data set\n    # coco2017 -> annotations -> instances_val2017.json\n    val_dataset = CocoDetection(COCO_root, \"val\", data_transform[\"val\"])\n\n    print(\"Creating data loaders\")\n    if args.distributed:\n        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)\n        test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)\n    else:\n        train_sampler = torch.utils.data.RandomSampler(train_dataset)\n        test_sampler = torch.utils.data.SequentialSampler(val_dataset)\n\n    if args.aspect_ratio_group_factor >= 0:\n        # 统计所有图像比例在bins区间中的位置索引\n        group_ids = create_aspect_ratio_groups(train_dataset, k=args.aspect_ratio_group_factor)\n        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)\n    else:\n        train_batch_sampler = torch.utils.data.BatchSampler(\n            train_sampler, args.batch_size, drop_last=True)\n\n    data_loader = torch.utils.data.DataLoader(\n        train_dataset, batch_sampler=train_batch_sampler, num_workers=args.workers,\n        collate_fn=train_dataset.collate_fn)\n\n    data_loader_test = torch.utils.data.DataLoader(\n        val_dataset, batch_size=1,\n        sampler=test_sampler, num_workers=args.workers,\n        collate_fn=train_dataset.collate_fn)\n\n    print(\"Creating model\")\n    # create model num_classes equal background + classes\n    model = create_model(num_classes=args.num_classes + 1)\n    model.to(device)\n\n    if args.distributed and args.sync_bn:\n        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)\n\n    model_without_ddp = model\n    if args.distributed:\n        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])\n        model_without_ddp = model.module\n\n    params = [p for p in model.parameters() if p.requires_grad]\n    optimizer = torch.optim.SGD(\n        params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)\n\n    scaler = torch.cuda.amp.GradScaler() if args.amp else None\n\n    # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma)\n    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)\n\n    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练\n    if args.resume:\n        # If map_location is missing, torch.load will first load the module to CPU\n        # and then copy each parameter to where it was saved,\n        # which would result in all processes on the same machine using the same set of devices.\n        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)\n        model_without_ddp.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n        if args.amp and \"scaler\" in checkpoint:\n            scaler.load_state_dict(checkpoint[\"scaler\"])\n\n    train_loss = []\n    learning_rate = []\n    val_map = []\n\n    print(\"Start training\")\n    start_time = time.time()\n    for epoch in range(args.start_epoch, args.epochs):\n        if args.distributed:\n            train_sampler.set_epoch(epoch)\n        mean_loss, lr = utils.train_one_epoch(model, optimizer, data_loader,\n                                              device, epoch, args.print_freq,\n                                              warmup=True, scaler=scaler)\n\n        # update learning rate\n        lr_scheduler.step()\n\n        # evaluate after every epoch\n        coco_info = utils.evaluate(model, data_loader_test, device=device)\n\n        # 只在主进程上进行写操作\n        if args.rank in [-1, 0]:\n            train_loss.append(mean_loss.item())\n            learning_rate.append(lr)\n            val_map.append(coco_info[1])  # pascal mAP\n\n            # write into txt\n            with open(results_file, \"a\") as f:\n                # 写入的数据包括coco指标还有loss和learning rate\n                result_info = [f\"{i:.4f}\" for i in coco_info + [mean_loss.item()]] + [f\"{lr:.6f}\"]\n                txt = \"epoch:{} {}\".format(epoch, '  '.join(result_info))\n                f.write(txt + \"\\n\")\n\n        if args.output_dir:\n            # 只在主节点上执行保存权重操作\n            save_files = {'model': model_without_ddp.state_dict(),\n                          'optimizer': optimizer.state_dict(),\n                          'lr_scheduler': lr_scheduler.state_dict(),\n                          'args': args,\n                          'epoch': epoch}\n            if args.amp:\n                save_files[\"scaler\"] = scaler.state_dict()\n            save_on_master(save_files,\n                           os.path.join(args.output_dir, f'model_{epoch}.pth'))\n\n    total_time = time.time() - start_time\n    total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n    print('Training time {}'.format(total_time_str))\n\n    if args.rank in [-1, 0]:\n        # plot loss and lr curve\n        if len(train_loss) != 0 and len(learning_rate) != 0:\n            from plot_curve import plot_loss_and_lr\n            plot_loss_and_lr(train_loss, learning_rate)\n\n        # plot mAP curve\n        if len(val_map) != 0:\n            from plot_curve import plot_map\n            plot_map(val_map)\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 训练文件的根目录(coco2017)\n    parser.add_argument('--data-path', default='/data/coco2017', help='dataset')\n    # 训练设备类型\n    parser.add_argument('--device', default='cuda', help='device')\n    # 检测目标类别数(不包含背景)\n    parser.add_argument('--num-classes', default=90, type=int, help='num_classes')\n    # 每块GPU上的batch_size\n    parser.add_argument('-b', '--batch-size', default=4, type=int,\n                        help='images per gpu, the total batch size is $NGPU x batch_size')\n    # 指定接着从哪个epoch数开始训练\n    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')\n    # 训练的总epoch数\n    parser.add_argument('--epochs', default=26, type=int, metavar='N',\n                        help='number of total epochs to run')\n    # 数据加载以及预处理的线程数\n    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',\n                        help='number of data loading workers (default: 4)')\n    # 学习率，这个需要根据gpu的数量以及batch_size进行设置0.02 / 8 * num_GPU\n    parser.add_argument('--lr', default=0.01, type=float,\n                        help='initial learning rate, 0.02 is the default value for training '\n                             'on 8 gpus and 2 images_per_gpu')\n    # SGD的momentum参数\n    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',\n                        help='momentum')\n    # SGD的weight_decay参数\n    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,\n                        metavar='W', help='weight decay (default: 1e-4)',\n                        dest='weight_decay')\n    # 针对torch.optim.lr_scheduler.StepLR的参数\n    parser.add_argument('--lr-step-size', default=8, type=int, help='decrease lr every step-size epochs')\n    # 针对torch.optim.lr_scheduler.MultiStepLR的参数\n    parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int,\n                        help='decrease lr every step-size epochs')\n    # 针对torch.optim.lr_scheduler.MultiStepLR的参数\n    parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')\n    # 训练过程打印信息的频率\n    parser.add_argument('--print-freq', default=20, type=int, help='print frequency')\n    # 文件保存地址\n    parser.add_argument('--output-dir', default='./multi_train', help='path where to save')\n    # 基于上次的训练结果接着训练\n    parser.add_argument('--resume', default='', help='resume from checkpoint')\n    parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)\n\n    # 开启的进程数(注意不是线程)\n    parser.add_argument('--world-size', default=4, type=int,\n                        help='number of distributed processes')\n    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')\n    parser.add_argument(\"--sync-bn\", dest=\"sync_bn\", help=\"Use sync batch norm\", type=bool, default=False)\n    # 是否使用混合精度训练(需要GPU支持混合精度)\n    parser.add_argument(\"--amp\", default=False, help=\"Use torch.cuda.amp for mixed precision training\")\n\n    args = parser.parse_args()\n\n    # 如果指定了保存文件地址，检查文件夹是否存在，若不存在，则创建\n    if args.output_dir:\n        mkdir(args.output_dir)\n\n    main(args)\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/train_utils/__init__.py",
    "content": "from .group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups\nfrom .distributed_utils import init_distributed_mode, save_on_master, mkdir\nfrom .coco_eval import EvalCOCOMetric\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/train_utils/coco_eval.py",
    "content": "import json\nimport copy\n\nimport numpy as np\nfrom pycocotools.coco import COCO\nfrom pycocotools.cocoeval import COCOeval\nimport pycocotools.mask as mask_util\nfrom .distributed_utils import all_gather, is_main_process\n\n\ndef merge(img_ids, eval_results):\n    \"\"\"将多个进程之间的数据汇总在一起\"\"\"\n    all_img_ids = all_gather(img_ids)\n    all_eval_results = all_gather(eval_results)\n\n    merged_img_ids = []\n    for p in all_img_ids:\n        merged_img_ids.extend(p)\n\n    merged_eval_results = []\n    for p in all_eval_results:\n        merged_eval_results.extend(p)\n\n    merged_img_ids = np.array(merged_img_ids)\n\n    # keep only unique (and in sorted order) images\n    # 去除重复的图片索引，多GPU训练时为了保证每个进程的训练图片数量相同，可能将一张图片分配给多个进程\n    merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)\n    merged_eval_results = [merged_eval_results[i] for i in idx]\n\n    return list(merged_img_ids), merged_eval_results\n\n\nclass EvalCOCOMetric:\n    def __init__(self,\n                 coco: COCO = None,\n                 iou_type: str = None,\n                 results_file_name: str = \"predict_results.json\",\n                 classes_mapping: dict = None):\n        self.coco = copy.deepcopy(coco)\n        self.img_ids = []  # 记录每个进程处理图片的ids\n        self.results = []\n        self.aggregation_results = None\n        self.classes_mapping = classes_mapping\n        self.coco_evaluator = None\n        assert iou_type in [\"bbox\", \"segm\", \"keypoints\"]\n        self.iou_type = iou_type\n        self.results_file_name = results_file_name\n\n    def prepare_for_coco_detection(self, targets, outputs):\n        \"\"\"将预测的结果转换成COCOeval指定的格式，针对目标检测任务\"\"\"\n        # 遍历每张图像的预测结果\n        for target, output in zip(targets, outputs):\n            if len(output) == 0:\n                continue\n\n            img_id = int(target[\"image_id\"])\n            if img_id in self.img_ids:\n                # 防止出现重复的数据\n                continue\n            self.img_ids.append(img_id)\n            per_image_boxes = output[\"boxes\"]\n            # 对于coco_eval, 需要的每个box的数据格式为[x_min, y_min, w, h]\n            # 而我们预测的box格式是[x_min, y_min, x_max, y_max]，所以需要转下格式\n            per_image_boxes[:, 2:] -= per_image_boxes[:, :2]\n            per_image_classes = output[\"labels\"].tolist()\n            per_image_scores = output[\"scores\"].tolist()\n\n            res_list = []\n            # 遍历每个目标的信息\n            for object_score, object_class, object_box in zip(\n                    per_image_scores, per_image_classes, per_image_boxes):\n                object_score = float(object_score)\n                class_idx = int(object_class)\n                if self.classes_mapping is not None:\n                    class_idx = int(self.classes_mapping[str(class_idx)])\n                # We recommend rounding coordinates to the nearest tenth of a pixel\n                # to reduce resulting JSON file size.\n                object_box = [round(b, 2) for b in object_box.tolist()]\n\n                res = {\"image_id\": img_id,\n                       \"category_id\": class_idx,\n                       \"bbox\": object_box,\n                       \"score\": round(object_score, 3)}\n                res_list.append(res)\n            self.results.append(res_list)\n\n    def prepare_for_coco_segmentation(self, targets, outputs):\n        \"\"\"将预测的结果转换成COCOeval指定的格式，针对实例分割任务\"\"\"\n        # 遍历每张图像的预测结果\n        for target, output in zip(targets, outputs):\n            if len(output) == 0:\n                continue\n\n            img_id = int(target[\"image_id\"])\n            if img_id in self.img_ids:\n                # 防止出现重复的数据\n                continue\n\n            self.img_ids.append(img_id)\n            per_image_masks = output[\"masks\"]\n            per_image_classes = output[\"labels\"].tolist()\n            per_image_scores = output[\"scores\"].tolist()\n\n            masks = per_image_masks > 0.5\n\n            res_list = []\n            # 遍历每个目标的信息\n            for mask, label, score in zip(masks, per_image_classes, per_image_scores):\n                rle = mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order=\"F\"))[0]\n                rle[\"counts\"] = rle[\"counts\"].decode(\"utf-8\")\n\n                class_idx = int(label)\n                if self.classes_mapping is not None:\n                    class_idx = int(self.classes_mapping[str(class_idx)])\n\n                res = {\"image_id\": img_id,\n                       \"category_id\": class_idx,\n                       \"segmentation\": rle,\n                       \"score\": round(score, 3)}\n                res_list.append(res)\n            self.results.append(res_list)\n\n    def update(self, targets, outputs):\n        if self.iou_type == \"bbox\":\n            self.prepare_for_coco_detection(targets, outputs)\n        elif self.iou_type == \"segm\":\n            self.prepare_for_coco_segmentation(targets, outputs)\n        else:\n            raise KeyError(f\"not support iou_type: {self.iou_type}\")\n\n    def synchronize_results(self):\n        # 同步所有进程中的数据\n        eval_ids, eval_results = merge(self.img_ids, self.results)\n        self.aggregation_results = {\"img_ids\": eval_ids, \"results\": eval_results}\n\n        # 主进程上保存即可\n        if is_main_process():\n            results = []\n            [results.extend(i) for i in eval_results]\n            # write predict results into json file\n            json_str = json.dumps(results, indent=4)\n            with open(self.results_file_name, 'w') as json_file:\n                json_file.write(json_str)\n\n    def evaluate(self):\n        # 只在主进程上评估即可\n        if is_main_process():\n            # accumulate predictions from all images\n            coco_true = self.coco\n            coco_pre = coco_true.loadRes(self.results_file_name)\n\n            self.coco_evaluator = COCOeval(cocoGt=coco_true, cocoDt=coco_pre, iouType=self.iou_type)\n\n            self.coco_evaluator.evaluate()\n            self.coco_evaluator.accumulate()\n            print(f\"IoU metric: {self.iou_type}\")\n            self.coco_evaluator.summarize()\n\n            coco_info = self.coco_evaluator.stats.tolist()  # numpy to list\n            return coco_info\n        else:\n            return None\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/train_utils/distributed_utils.py",
    "content": "from collections import defaultdict, deque\nimport datetime\nimport pickle\nimport time\nimport errno\nimport os\n\nimport torch\nimport torch.distributed as dist\n\n\nclass SmoothedValue(object):\n    \"\"\"Track a series of values and provide access to smoothed values over a\n    window or the global series average.\n    \"\"\"\n    def __init__(self, window_size=20, fmt=None):\n        if fmt is None:\n            fmt = \"{value:.4f} ({global_avg:.4f})\"\n        self.deque = deque(maxlen=window_size)  # deque简单理解成加强版list\n        self.total = 0.0\n        self.count = 0\n        self.fmt = fmt\n\n    def update(self, value, n=1):\n        self.deque.append(value)\n        self.count += n\n        self.total += value * n\n\n    def synchronize_between_processes(self):\n        \"\"\"\n        Warning: does not synchronize the deque!\n        \"\"\"\n        if not is_dist_avail_and_initialized():\n            return\n        t = torch.tensor([self.count, self.total], dtype=torch.float64, device=\"cuda\")\n        dist.barrier()\n        dist.all_reduce(t)\n        t = t.tolist()\n        self.count = int(t[0])\n        self.total = t[1]\n\n    @property\n    def median(self):  # @property 是装饰器，这里可简单理解为增加median属性(只读)\n        d = torch.tensor(list(self.deque))\n        return d.median().item()\n\n    @property\n    def avg(self):\n        d = torch.tensor(list(self.deque), dtype=torch.float32)\n        return d.mean().item()\n\n    @property\n    def global_avg(self):\n        return self.total / self.count\n\n    @property\n    def max(self):\n        return max(self.deque)\n\n    @property\n    def value(self):\n        return self.deque[-1]\n\n    def __str__(self):\n        return self.fmt.format(\n            median=self.median,\n            avg=self.avg,\n            global_avg=self.global_avg,\n            max=self.max,\n            value=self.value)\n\n\ndef all_gather(data):\n    \"\"\"\n    收集各个进程中的数据\n    Run all_gather on arbitrary picklable data (not necessarily tensors)\n    Args:\n        data: any picklable object\n    Returns:\n        list[data]: list of data gathered from each rank\n    \"\"\"\n    world_size = get_world_size()  # 进程数\n    if world_size == 1:\n        return [data]\n\n    data_list = [None] * world_size\n    dist.all_gather_object(data_list, data)\n\n    return data_list\n\n\ndef reduce_dict(input_dict, average=True):\n    \"\"\"\n    Args:\n        input_dict (dict): all the values will be reduced\n        average (bool): whether to do average or sum\n    Reduce the values in the dictionary from all processes so that all processes\n    have the averaged results. Returns a dict with the same fields as\n    input_dict, after reduction.\n    \"\"\"\n    world_size = get_world_size()\n    if world_size < 2:  # 单GPU的情况\n        return input_dict\n    with torch.no_grad():  # 多GPU的情况\n        names = []\n        values = []\n        # sort the keys so that they are consistent across processes\n        for k in sorted(input_dict.keys()):\n            names.append(k)\n            values.append(input_dict[k])\n        values = torch.stack(values, dim=0)\n        dist.all_reduce(values)\n        if average:\n            values /= world_size\n\n        reduced_dict = {k: v for k, v in zip(names, values)}\n        return reduced_dict\n\n\nclass MetricLogger(object):\n    def __init__(self, delimiter=\"\\t\"):\n        self.meters = defaultdict(SmoothedValue)\n        self.delimiter = delimiter\n\n    def update(self, **kwargs):\n        for k, v in kwargs.items():\n            if isinstance(v, torch.Tensor):\n                v = v.item()\n            assert isinstance(v, (float, int))\n            self.meters[k].update(v)\n\n    def __getattr__(self, attr):\n        if attr in self.meters:\n            return self.meters[attr]\n        if attr in self.__dict__:\n            return self.__dict__[attr]\n        raise AttributeError(\"'{}' object has no attribute '{}'\".format(\n            type(self).__name__, attr))\n\n    def __str__(self):\n        loss_str = []\n        for name, meter in self.meters.items():\n            loss_str.append(\n                \"{}: {}\".format(name, str(meter))\n            )\n        return self.delimiter.join(loss_str)\n\n    def synchronize_between_processes(self):\n        for meter in self.meters.values():\n            meter.synchronize_between_processes()\n\n    def add_meter(self, name, meter):\n        self.meters[name] = meter\n\n    def log_every(self, iterable, print_freq, header=None):\n        i = 0\n        if not header:\n            header = \"\"\n        start_time = time.time()\n        end = time.time()\n        iter_time = SmoothedValue(fmt='{avg:.4f}')\n        data_time = SmoothedValue(fmt='{avg:.4f}')\n        space_fmt = \":\" + str(len(str(len(iterable)))) + \"d\"\n        if torch.cuda.is_available():\n            log_msg = self.delimiter.join([header,\n                                           '[{0' + space_fmt + '}/{1}]',\n                                           'eta: {eta}',\n                                           '{meters}',\n                                           'time: {time}',\n                                           'data: {data}',\n                                           'max mem: {memory:.0f}'])\n        else:\n            log_msg = self.delimiter.join([header,\n                                           '[{0' + space_fmt + '}/{1}]',\n                                           'eta: {eta}',\n                                           '{meters}',\n                                           'time: {time}',\n                                           'data: {data}'])\n        MB = 1024.0 * 1024.0\n        for obj in iterable:\n            data_time.update(time.time() - end)\n            yield obj\n            iter_time.update(time.time() - end)\n            if i % print_freq == 0 or i == len(iterable) - 1:\n                eta_second = iter_time.global_avg * (len(iterable) - i)\n                eta_string = str(datetime.timedelta(seconds=eta_second))\n                if torch.cuda.is_available():\n                    print(log_msg.format(i, len(iterable),\n                                         eta=eta_string,\n                                         meters=str(self),\n                                         time=str(iter_time),\n                                         data=str(data_time),\n                                         memory=torch.cuda.max_memory_allocated() / MB))\n                else:\n                    print(log_msg.format(i, len(iterable),\n                                         eta=eta_string,\n                                         meters=str(self),\n                                         time=str(iter_time),\n                                         data=str(data_time)))\n            i += 1\n            end = time.time()\n        total_time = time.time() - start_time\n        total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n        print('{} Total time: {} ({:.4f} s / it)'.format(header,\n                                                         total_time_str,\n\n                                                         total_time / len(iterable)))\n\n\ndef warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):\n\n    def f(x):\n        \"\"\"根据step数返回一个学习率倍率因子\"\"\"\n        if x >= warmup_iters:  # 当迭代数大于给定的warmup_iters时，倍率因子为1\n            return 1\n        alpha = float(x) / warmup_iters\n        # 迭代过程中倍率因子从warmup_factor -> 1\n        return warmup_factor * (1 - alpha) + alpha\n\n    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)\n\n\ndef mkdir(path):\n    try:\n        os.makedirs(path)\n    except OSError as e:\n        if e.errno != errno.EEXIST:\n            raise\n\n\ndef setup_for_distributed(is_master):\n    \"\"\"\n    This function disables when not in master process\n    \"\"\"\n    import builtins as __builtin__\n    builtin_print = __builtin__.print\n\n    def print(*args, **kwargs):\n        force = kwargs.pop('force', False)\n        if is_master or force:\n            builtin_print(*args, **kwargs)\n\n    __builtin__.print = print\n\n\ndef is_dist_avail_and_initialized():\n    \"\"\"检查是否支持分布式环境\"\"\"\n    if not dist.is_available():\n        return False\n    if not dist.is_initialized():\n        return False\n    return True\n\n\ndef get_world_size():\n    if not is_dist_avail_and_initialized():\n        return 1\n    return dist.get_world_size()\n\n\ndef get_rank():\n    if not is_dist_avail_and_initialized():\n        return 0\n    return dist.get_rank()\n\n\ndef is_main_process():\n    return get_rank() == 0\n\n\ndef save_on_master(*args, **kwargs):\n    if is_main_process():\n        torch.save(*args, **kwargs)\n\n\ndef init_distributed_mode(args):\n    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:\n        args.rank = int(os.environ[\"RANK\"])\n        args.world_size = int(os.environ['WORLD_SIZE'])\n        args.gpu = int(os.environ['LOCAL_RANK'])\n    elif 'SLURM_PROCID' in os.environ:\n        args.rank = int(os.environ['SLURM_PROCID'])\n        args.gpu = args.rank % torch.cuda.device_count()\n    else:\n        print('Not using distributed mode')\n        args.distributed = False\n        return\n\n    args.distributed = True\n\n    torch.cuda.set_device(args.gpu)\n    args.dist_backend = 'nccl'\n    print('| distributed init (rank {}): {}'.format(\n        args.rank, args.dist_url), flush=True)\n    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,\n                                         world_size=args.world_size, rank=args.rank)\n    torch.distributed.barrier()\n    setup_for_distributed(args.rank == 0)\n\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/train_utils/group_by_aspect_ratio.py",
    "content": "import bisect\nfrom collections import defaultdict\nimport copy\nfrom itertools import repeat, chain\nimport math\nimport numpy as np\n\nimport torch\nimport torch.utils.data\nfrom torch.utils.data.sampler import BatchSampler, Sampler\nfrom torch.utils.model_zoo import tqdm\nimport torchvision\n\nfrom PIL import Image\n\n\ndef _repeat_to_at_least(iterable, n):\n    repeat_times = math.ceil(n / len(iterable))\n    repeated = chain.from_iterable(repeat(iterable, repeat_times))\n    return list(repeated)\n\n\nclass GroupedBatchSampler(BatchSampler):\n    \"\"\"\n    Wraps another sampler to yield a mini-batch of indices.\n    It enforces that the batch only contain elements from the same group.\n    It also tries to provide mini-batches which follows an ordering which is\n    as close as possible to the ordering from the original sampler.\n    Arguments:\n        sampler (Sampler): Base sampler.\n        group_ids (list[int]): If the sampler produces indices in range [0, N),\n            `group_ids` must be a list of `N` ints which contains the group id of each sample.\n            The group ids must be a continuous set of integers starting from\n            0, i.e. they must be in the range [0, num_groups).\n        batch_size (int): Size of mini-batch.\n    \"\"\"\n    def __init__(self, sampler, group_ids, batch_size):\n        if not isinstance(sampler, Sampler):\n            raise ValueError(\n                \"sampler should be an instance of \"\n                \"torch.utils.data.Sampler, but got sampler={}\".format(sampler)\n            )\n        self.sampler = sampler\n        self.group_ids = group_ids\n        self.batch_size = batch_size\n\n    def __iter__(self):\n        buffer_per_group = defaultdict(list)\n        samples_per_group = defaultdict(list)\n\n        num_batches = 0\n        for idx in self.sampler:\n            group_id = self.group_ids[idx]\n            buffer_per_group[group_id].append(idx)\n            samples_per_group[group_id].append(idx)\n            if len(buffer_per_group[group_id]) == self.batch_size:\n                yield buffer_per_group[group_id]\n                num_batches += 1\n                del buffer_per_group[group_id]\n            assert len(buffer_per_group[group_id]) < self.batch_size\n\n        # now we have run out of elements that satisfy\n        # the group criteria, let's return the remaining\n        # elements so that the size of the sampler is\n        # deterministic\n        expected_num_batches = len(self)\n        num_remaining = expected_num_batches - num_batches\n        if num_remaining > 0:\n            # for the remaining batches, take first the buffers with largest number\n            # of elements\n            for group_id, _ in sorted(buffer_per_group.items(),\n                                      key=lambda x: len(x[1]), reverse=True):\n                remaining = self.batch_size - len(buffer_per_group[group_id])\n                samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining)\n                buffer_per_group[group_id].extend(samples_from_group_id[:remaining])\n                assert len(buffer_per_group[group_id]) == self.batch_size\n                yield buffer_per_group[group_id]\n                num_remaining -= 1\n                if num_remaining == 0:\n                    break\n        assert num_remaining == 0\n\n    def __len__(self):\n        return len(self.sampler) // self.batch_size\n\n\ndef _compute_aspect_ratios_slow(dataset, indices=None):\n    print(\"Your dataset doesn't support the fast path for \"\n          \"computing the aspect ratios, so will iterate over \"\n          \"the full dataset and load every image instead. \"\n          \"This might take some time...\")\n    if indices is None:\n        indices = range(len(dataset))\n\n    class SubsetSampler(Sampler):\n        def __init__(self, indices):\n            self.indices = indices\n\n        def __iter__(self):\n            return iter(self.indices)\n\n        def __len__(self):\n            return len(self.indices)\n\n    sampler = SubsetSampler(indices)\n    data_loader = torch.utils.data.DataLoader(\n        dataset, batch_size=1, sampler=sampler,\n        num_workers=14,  # you might want to increase it for faster processing\n        collate_fn=lambda x: x[0])\n    aspect_ratios = []\n    with tqdm(total=len(dataset)) as pbar:\n        for _i, (img, _) in enumerate(data_loader):\n            pbar.update(1)\n            height, width = img.shape[-2:]\n            aspect_ratio = float(width) / float(height)\n            aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_custom_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n    aspect_ratios = []\n    for i in indices:\n        height, width = dataset.get_height_and_width(i)\n        aspect_ratio = float(width) / float(height)\n        aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_coco_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n    aspect_ratios = []\n    for i in indices:\n        img_info = dataset.coco.imgs[dataset.ids[i]]\n        aspect_ratio = float(img_info[\"width\"]) / float(img_info[\"height\"])\n        aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_voc_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n    aspect_ratios = []\n    for i in indices:\n        # this doesn't load the data into memory, because PIL loads it lazily\n        width, height = Image.open(dataset.images[i]).size\n        aspect_ratio = float(width) / float(height)\n        aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_subset_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n\n    ds_indices = [dataset.indices[i] for i in indices]\n    return compute_aspect_ratios(dataset.dataset, ds_indices)\n\n\ndef compute_aspect_ratios(dataset, indices=None):\n    if hasattr(dataset, \"get_height_and_width\"):\n        return _compute_aspect_ratios_custom_dataset(dataset, indices)\n\n    if isinstance(dataset, torchvision.datasets.CocoDetection):\n        return _compute_aspect_ratios_coco_dataset(dataset, indices)\n\n    if isinstance(dataset, torchvision.datasets.VOCDetection):\n        return _compute_aspect_ratios_voc_dataset(dataset, indices)\n\n    if isinstance(dataset, torch.utils.data.Subset):\n        return _compute_aspect_ratios_subset_dataset(dataset, indices)\n\n    # slow path\n    return _compute_aspect_ratios_slow(dataset, indices)\n\n\ndef _quantize(x, bins):\n    bins = copy.deepcopy(bins)\n    bins = sorted(bins)\n    # bisect_right：寻找y元素按顺序应该排在bins中哪个元素的右边，返回的是索引\n    quantized = list(map(lambda y: bisect.bisect_right(bins, y), x))\n    return quantized\n\n\ndef create_aspect_ratio_groups(dataset, k=0):\n    # 计算所有数据集中的图片width/height比例\n    aspect_ratios = compute_aspect_ratios(dataset)\n    # 将[0.5, 2]区间划分成2*k+1等份\n    bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0]\n\n    # 统计所有图像比例在bins区间中的位置索引\n    groups = _quantize(aspect_ratios, bins)\n    # count number of elements per group\n    # 统计每个区间的频次\n    counts = np.unique(groups, return_counts=True)[1]\n    fbins = [0] + bins + [np.inf]\n    print(\"Using {} as bins for aspect ratio quantization\".format(fbins))\n    print(\"Count of instances per bin: {}\".format(counts))\n    return groups\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/train_utils/train_eval_utils.py",
    "content": "import math\nimport sys\nimport time\n\nimport torch\n\nimport train_utils.distributed_utils as utils\nfrom .coco_eval import EvalCOCOMetric\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch,\n                    print_freq=50, warmup=False, scaler=None):\n    model.train()\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))\n    header = 'Epoch: [{}]'.format(epoch)\n\n    lr_scheduler = None\n    if epoch == 0 and warmup is True:  # 当训练第一轮（epoch=0）时，启用warmup训练方式，可理解为热身训练\n        warmup_factor = 1.0 / 1000\n        warmup_iters = min(1000, len(data_loader) - 1)\n\n        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)\n\n    mloss = torch.zeros(1).to(device)  # mean losses\n    for i, [images, targets] in enumerate(metric_logger.log_every(data_loader, print_freq, header)):\n        images = list(image.to(device) for image in images)\n        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]\n\n        # 混合精度训练上下文管理器，如果在CPU环境中不起任何作用\n        with torch.cuda.amp.autocast(enabled=scaler is not None):\n            loss_dict = model(images, targets)\n\n            losses = sum(loss for loss in loss_dict.values())\n\n        # reduce losses over all GPUs for logging purpose\n        loss_dict_reduced = utils.reduce_dict(loss_dict)\n        losses_reduced = sum(loss for loss in loss_dict_reduced.values())\n\n        loss_value = losses_reduced.item()\n        # 记录训练损失\n        mloss = (mloss * i + loss_value) / (i + 1)  # update mean losses\n\n        if not math.isfinite(loss_value):  # 当计算的损失为无穷大时停止训练\n            print(\"Loss is {}, stopping training\".format(loss_value))\n            print(loss_dict_reduced)\n            sys.exit(1)\n\n        optimizer.zero_grad()\n        if scaler is not None:\n            scaler.scale(losses).backward()\n            scaler.step(optimizer)\n            scaler.update()\n        else:\n            losses.backward()\n            optimizer.step()\n\n        if lr_scheduler is not None:  # 第一轮使用warmup训练方式\n            lr_scheduler.step()\n\n        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)\n        now_lr = optimizer.param_groups[0][\"lr\"]\n        metric_logger.update(lr=now_lr)\n\n    return mloss, now_lr\n\n\n@torch.no_grad()\ndef evaluate(model, data_loader, device):\n    cpu_device = torch.device(\"cpu\")\n    model.eval()\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    header = \"Test: \"\n\n    det_metric = EvalCOCOMetric(data_loader.dataset.coco, iou_type=\"bbox\", results_file_name=\"det_results.json\")\n    for image, targets in metric_logger.log_every(data_loader, 100, header):\n        image = list(img.to(device) for img in image)\n\n        # 当使用CPU时，跳过GPU相关指令\n        if device != torch.device(\"cpu\"):\n            torch.cuda.synchronize(device)\n\n        model_time = time.time()\n        outputs = model(image)\n\n        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]\n        model_time = time.time() - model_time\n\n        det_metric.update(targets, outputs)\n        metric_logger.update(model_time=model_time)\n\n    # gather the stats from all processes\n    metric_logger.synchronize_between_processes()\n    print(\"Averaged stats:\", metric_logger)\n\n    # 同步所有进程中的数据\n    det_metric.synchronize_results()\n\n    if utils.is_main_process():\n        coco_info = det_metric.evaluate()\n    else:\n        coco_info = None\n\n    return coco_info\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/transforms.py",
    "content": "import random\nfrom torchvision.transforms import functional as F\n\n\nclass Compose(object):\n    \"\"\"组合多个transform函数\"\"\"\n    def __init__(self, transforms):\n        self.transforms = transforms\n\n    def __call__(self, image, target):\n        for t in self.transforms:\n            image, target = t(image, target)\n        return image, target\n\n\nclass ToTensor(object):\n    \"\"\"将PIL图像转为Tensor\"\"\"\n    def __call__(self, image, target):\n        image = F.to_tensor(image)\n        return image, target\n\n\nclass RandomHorizontalFlip(object):\n    \"\"\"随机水平翻转图像以及bboxes\"\"\"\n    def __init__(self, prob=0.5):\n        self.prob = prob\n\n    def __call__(self, image, target):\n        if random.random() < self.prob:\n            height, width = image.shape[-2:]\n            image = image.flip(-1)  # 水平翻转图片\n            bbox = target[\"boxes\"]\n            # bbox: xmin, ymin, xmax, ymax\n            bbox[:, [0, 2]] = width - bbox[:, [2, 0]]  # 翻转对应bbox坐标信息\n            target[\"boxes\"] = bbox\n        return image, target\n"
  },
  {
    "path": "pytorch_object_detection/train_coco_dataset/validation.py",
    "content": "\"\"\"\n该脚本用于调用训练好的模型权重去计算验证集/测试集的COCO指标\n以及每个类别的mAP(IoU=0.5)\n\"\"\"\n\nimport os\nimport json\n\nimport torch\nimport torchvision\nfrom tqdm import tqdm\nimport numpy as np\nfrom torchvision.models.feature_extraction import create_feature_extractor\n\nimport transforms\nfrom network_files import FasterRCNN, AnchorsGenerator\nfrom my_dataset import CocoDetection\nfrom backbone import resnet50\nfrom train_utils import EvalCOCOMetric\n\n\ndef summarize(self, catId=None):\n    \"\"\"\n    Compute and display summary metrics for evaluation results.\n    Note this functin can *only* be applied on the default parameter setting\n    \"\"\"\n\n    def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100):\n        p = self.params\n        iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'\n        titleStr = 'Average Precision' if ap == 1 else 'Average Recall'\n        typeStr = '(AP)' if ap == 1 else '(AR)'\n        iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \\\n            if iouThr is None else '{:0.2f}'.format(iouThr)\n\n        aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]\n        mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]\n\n        if ap == 1:\n            # dimension of precision: [TxRxKxAxM]\n            s = self.eval['precision']\n            # IoU\n            if iouThr is not None:\n                t = np.where(iouThr == p.iouThrs)[0]\n                s = s[t]\n\n            if isinstance(catId, int):\n                s = s[:, :, catId, aind, mind]\n            else:\n                s = s[:, :, :, aind, mind]\n\n        else:\n            # dimension of recall: [TxKxAxM]\n            s = self.eval['recall']\n            if iouThr is not None:\n                t = np.where(iouThr == p.iouThrs)[0]\n                s = s[t]\n\n            if isinstance(catId, int):\n                s = s[:, catId, aind, mind]\n            else:\n                s = s[:, :, aind, mind]\n\n        if len(s[s > -1]) == 0:\n            mean_s = -1\n        else:\n            mean_s = np.mean(s[s > -1])\n\n        print_string = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)\n        return mean_s, print_string\n\n    stats, print_list = [0] * 12, [\"\"] * 12\n    stats[0], print_list[0] = _summarize(1)\n    stats[1], print_list[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2])\n    stats[2], print_list[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2])\n    stats[3], print_list[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2])\n    stats[4], print_list[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2])\n    stats[5], print_list[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2])\n    stats[6], print_list[6] = _summarize(0, maxDets=self.params.maxDets[0])\n    stats[7], print_list[7] = _summarize(0, maxDets=self.params.maxDets[1])\n    stats[8], print_list[8] = _summarize(0, maxDets=self.params.maxDets[2])\n    stats[9], print_list[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2])\n    stats[10], print_list[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2])\n    stats[11], print_list[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2])\n\n    print_info = \"\\n\".join(print_list)\n\n    if not self.eval:\n        raise Exception('Please run accumulate() first')\n\n    return stats, print_info\n\n\ndef main(parser_data):\n    device = torch.device(parser_data.device if torch.cuda.is_available() else \"cpu\")\n    print(\"Using {} device training.\".format(device.type))\n\n    data_transform = {\n        \"val\": transforms.Compose([transforms.ToTensor()])\n    }\n\n    # read class_indict\n    label_json_path = './coco91_indices.json'\n    assert os.path.exists(label_json_path), \"json file {} dose not exist.\".format(label_json_path)\n    with open(label_json_path, 'r') as f:\n        category_index = json.load(f)\n\n    coco_root = parser_data.data_path\n\n    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch\n    batch_size = parser_data.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using %g dataloader workers' % nw)\n\n    # load validation data set\n    val_dataset = CocoDetection(coco_root, \"val\", data_transform[\"val\"])\n    val_dataset_loader = torch.utils.data.DataLoader(val_dataset,\n                                                     batch_size=batch_size,\n                                                     shuffle=False,\n                                                     pin_memory=True,\n                                                     num_workers=nw,\n                                                     collate_fn=val_dataset.collate_fn)\n\n    # create model\n    res50 = resnet50()\n    backbone = create_feature_extractor(res50, return_nodes={\"layer3\": \"0\"})\n    backbone.out_channels = 1024\n\n    anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),),\n                                        aspect_ratios=((0.5, 1.0, 2.0),))\n\n    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],  # 在哪些特征层上进行roi pooling\n                                                    output_size=[7, 7],  # roi_pooling输出特征矩阵尺寸\n                                                    sampling_ratio=2)  # 采样率\n\n    # num_classes equal 80 + background classes\n    model = FasterRCNN(backbone=backbone,\n                       num_classes=parser_data.num_classes + 1,\n                       rpn_anchor_generator=anchor_generator,\n                       box_roi_pool=roi_pooler)\n\n    # 载入你自己训练好的模型权重\n    weights_path = parser_data.weights_path\n    assert os.path.exists(weights_path), \"not found {} file.\".format(weights_path)\n    weights_dict = torch.load(weights_path, map_location='cpu')\n    weights_dict = weights_dict[\"model\"] if \"model\" in weights_dict else weights_dict\n    model.load_state_dict(weights_dict)\n    # print(model)\n\n    model.to(device)\n\n    # evaluate on the val dataset\n    cpu_device = torch.device(\"cpu\")\n\n    det_metric = EvalCOCOMetric(val_dataset.coco, \"bbox\", \"det_results.json\")\n    model.eval()\n    with torch.no_grad():\n        for image, targets in tqdm(val_dataset_loader, desc=\"validation...\"):\n            # 将图片传入指定设备device\n            image = list(img.to(device) for img in image)\n\n            # inference\n            outputs = model(image)\n            outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]\n            det_metric.update(targets, outputs)\n\n    det_metric.synchronize_results()\n    det_metric.evaluate()\n\n    # calculate COCO info for all classes\n    coco_stats, print_coco = summarize(det_metric.coco_evaluator)\n\n    # calculate voc info for every classes(IoU=0.5)\n    voc_map_info_list = []\n    classes = [v for v in category_index.values() if v != \"N/A\"]\n    for i in range(len(classes)):\n        stats, _ = summarize(det_metric.coco_evaluator, catId=i)\n        voc_map_info_list.append(\" {:15}: {}\".format(classes[i], stats[1]))\n\n    print_voc = \"\\n\".join(voc_map_info_list)\n    print(print_voc)\n\n    # 将验证结果保存至txt文件中\n    with open(\"record_mAP.txt\", \"w\") as f:\n        record_lines = [\"COCO results:\",\n                        print_coco,\n                        \"\",\n                        \"mAP(IoU=0.5) for each category:\",\n                        print_voc]\n        f.write(\"\\n\".join(record_lines))\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 使用设备类型\n    parser.add_argument('--device', default='cuda', help='device')\n\n    # 检测目标类别数\n    parser.add_argument('--num-classes', type=int, default=90, help='number of classes')\n\n    # 数据集的根目录(coco2017根目录)\n    parser.add_argument('--data-path', default='/data/coco2017', help='dataset root')\n\n    # 训练好的权重文件\n    parser.add_argument('--weights-path', default='./save_weights/model.pth', type=str, help='training weights')\n\n    # batch size\n    parser.add_argument('--batch_size', default=1, type=int, metavar='N',\n                        help='batch size when validation.')\n\n    args = parser.parse_args()\n\n    main(args)\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/README.md",
    "content": "# YOLOv3 SPP\n## 该项目源自[ultralytics/yolov3](https://github.com/ultralytics/yolov3)\n## 1 环境配置：\n* Python3.6或者3.7\n* Pytorch1.7.1(注意：必须是1.6.0或以上，因为使用官方提供的混合精度训练1.6.0后才支持)\n* pycocotools(Linux: `pip install pycocotools`;   \n  Windows: `pip install pycocotools-windows`(不需要额外安装vs))\n* 更多环境配置信息，请查看`requirements.txt`文件\n* 最好使用GPU训练\n\n## 2 文件结构：\n```\n  ├── cfg: 配置文件目录\n  │    ├── hyp.yaml: 训练网络的相关超参数\n  │    └── yolov3-spp.cfg: yolov3-spp网络结构配置 \n  │ \n  ├── data: 存储训练时数据集相关信息缓存\n  │    └── pascal_voc_classes.json: pascal voc数据集标签\n  │ \n  ├── runs: 保存训练过程中生成的所有tensorboard相关文件\n  ├── build_utils: 搭建训练网络时使用到的工具\n  │     ├── datasets.py: 数据读取以及预处理方法\n  │     ├── img_utils.py: 部分图像处理方法\n  │     ├── layers.py: 实现的一些基础层结构\n  │     ├── parse_config.py: 解析yolov3-spp.cfg文件\n  │     ├── torch_utils.py: 使用pytorch实现的一些工具\n  │     └── utils.py: 训练网络过程中使用到的一些方法\n  │\n  ├── train_utils: 训练验证网络时使用到的工具(包括多GPU训练以及使用cocotools)\n  ├── weights: 所有相关预训练权重(下面会给出百度云的下载地址)\n  ├── model.py: 模型搭建文件\n  ├── train.py: 针对单GPU或者CPU的用户使用\n  ├── train_multi_GPU.py: 针对使用多GPU的用户使用\n  ├── trans_voc2yolo.py: 将voc数据集标注信息(.xml)转为yolo标注格式(.txt)\n  ├── calculate_dataset.py: 1)统计训练集和验证集的数据并生成相应.txt文件\n  │                         2)创建data.data文件\n  │                         3)根据yolov3-spp.cfg结合数据集类别数创建my_yolov3.cfg文件\n  └── predict_test.py: 简易的预测脚本，使用训练好的权重进行预测测试\n```\n\n## 3 训练数据的准备以及目录结构\n* 这里建议标注数据时直接生成yolo格式的标签文件`.txt`，推荐使用免费开源的标注软件(支持yolo格式)，[https://github.com/tzutalin/labelImg](https://github.com/tzutalin/labelImg)\n* 如果之前已经标注成pascal voc的`.xml`格式了也没关系，我写了个voc转yolo格式的转化脚本，4.1会讲怎么使用\n* 测试图像时最好将图像缩放到32的倍数\n* 标注好的数据集请按照以下目录结构进行摆放:\n```\n├── my_yolo_dataset 自定义数据集根目录\n│         ├── train   训练集目录\n│         │     ├── images  训练集图像目录\n│         │     └── labels  训练集标签目录 \n│         └── val    验证集目录\n│               ├── images  验证集图像目录\n│               └── labels  验证集标签目录            \n```\n\n## 4 利用标注好的数据集生成一系列相关准备文件，为了方便我写了个脚本，通过脚本可直接生成。也可参考原作者的[教程](https://github.com/ultralytics/yolov3/wiki/Train-Custom-Data)\n```\n├── data 利用数据集生成的一系列相关准备文件目录\n│    ├── my_train_data.txt:  该文件里存储的是所有训练图片的路径地址\n│    ├── my_val_data.txt:  该文件里存储的是所有验证图片的路径地址\n│    ├── my_data_label.names:  该文件里存储的是所有类别的名称，一个类别对应一行(这里会根据`.json`文件自动生成)\n│    └── my_data.data:  该文件里记录的是类别数类别信息、train以及valid对应的txt文件\n```\n\n### 4.1 将VOC标注数据转为YOLO标注数据(如果你的数据已经是YOLO格式了，可跳过该步骤)\n* 使用`trans_voc2yolo.py`脚本进行转换，并在`./data/`文件夹下生成`my_data_label.names`标签文件，\n* 执行脚本前，需要根据自己的路径修改以下参数\n```python\n# voc数据集根目录以及版本\nvoc_root = \"./VOCdevkit\"\nvoc_version = \"VOC2012\"\n\n# 转换的训练集以及验证集对应txt文件，对应VOCdevkit/VOC2012/ImageSets/Main文件夹下的txt文件\ntrain_txt = \"train.txt\"\nval_txt = \"val.txt\"\n\n# 转换后的文件保存目录\nsave_file_root = \"/home/wz/my_project/my_yolo_dataset\"\n\n# label标签对应json文件\nlabel_json_path = './data/pascal_voc_classes.json'\n```\n* 生成的`my_data_label.names`标签文件格式如下\n```text\naeroplane\nbicycle\nbird\nboat\nbottle\nbus\n...\n```\n\n### 4.2 根据摆放好的数据集信息生成一系列相关准备文件\n* 使用`calculate_dataset.py`脚本生成`my_train_data.txt`文件、`my_val_data.txt`文件以及`my_data.data`文件，并生成新的`my_yolov3.cfg`文件\n* 执行脚本前，需要根据自己的路径修改以下参数\n```python\n# 训练集的labels目录路径\ntrain_annotation_dir = \"/home/wz/my_project/my_yolo_dataset/train/labels\"\n# 验证集的labels目录路径\nval_annotation_dir = \"/home/wz/my_project/my_yolo_dataset/val/labels\"\n# 上一步生成的my_data_label.names文件路径(如果没有该文件，可以自己手动编辑一个txt文档，然后重命名为.names格式即可)\nclasses_label = \"./data/my_data_label.names\"\n# 原始yolov3-spp.cfg网络结构配置文件\ncfg_path = \"./cfg/yolov3-spp.cfg\"\n```\n\n## 5 预训练权重下载地址（下载后放入weights文件夹中）：\n* `yolov3-spp-ultralytics-416.pt`: 链接: https://pan.baidu.com/s/1cK3USHKxDx-d5dONij52lA  密码: r3vm\n* `yolov3-spp-ultralytics-512.pt`: 链接: https://pan.baidu.com/s/1k5yeTZZNv8Xqf0uBXnUK-g  密码: e3k1\n* `yolov3-spp-ultralytics-608.pt`: 链接: https://pan.baidu.com/s/1GI8BA0wxeWMC0cjrC01G7Q  密码: ma3t\n* `yolov3spp-voc-512.pt` **(这是我在视频演示训练中得到的权重)**: 链接: https://pan.baidu.com/s/1aFAtaHlge0ieFtQ9nhmj3w  密码: 8ph3\n \n \n## 6 数据集，本例程使用的是PASCAL VOC2012数据集\n* `Pascal VOC2012` train/val数据集下载地址：http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar\n* 如果不了解数据集或者想使用自己的数据集进行训练，请参考我的bilibili：https://b23.tv/F1kSCK\n\n## 7 使用方法\n* 确保提前准备好数据集\n* 确保提前下载好对应预训练模型权重\n* 若要使用单GPU训练或者使用CPU训练，直接使用train.py训练脚本\n* 若要使用多GPU训练，使用`python -m torch.distributed.launch --nproc_per_node=8 --use_env train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量\n* 训练过程中保存的`results.txt`是每个epoch在验证集上的COCO指标，前12个值是COCO指标，后面两个值是训练平均损失以及学习率\n\n## 如果对YOLOv3 SPP网络原理不是很理解可参考我的bilibili\n[https://www.bilibili.com/video/BV1yi4y1g7ro?p=3](https://www.bilibili.com/video/BV1yi4y1g7ro?p=3)\n\n## 进一步了解该项目，以及对YOLOv3 SPP代码的分析可参考我的bilibili\n[https://www.bilibili.com/video/BV1t54y1C7ra](https://www.bilibili.com/video/BV1t54y1C7ra)\n\n## YOLOv3 SPP框架图\n![yolov3spp](yolov3spp.png) \n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/build_utils/__init__.py",
    "content": ""
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/build_utils/datasets.py",
    "content": "import math\nimport os\nimport random\nimport shutil\nfrom pathlib import Path\n\nimport cv2\nimport numpy as np\nimport torch\nfrom PIL import Image, ExifTags\nfrom torch.utils.data import Dataset\nfrom tqdm import tqdm\n\nfrom build_utils.utils import xyxy2xywh, xywh2xyxy\n\nhelp_url = 'https://github.com/ultralytics/yolov3/wiki/Train-Custom-Data'\nimg_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.dng']\n\n\n# get orientation in exif tag\n# 找到图像exif信息中对应旋转信息的key值\nfor orientation in ExifTags.TAGS.keys():\n    if ExifTags.TAGS[orientation] == \"Orientation\":\n        break\n\n\ndef exif_size(img):\n    \"\"\"\n    获取图像的原始img size\n    通过exif的orientation信息判断图像是否有旋转，如果有旋转则返回旋转前的size\n    :param img: PIL图片\n    :return: 原始图像的size\n    \"\"\"\n    # Returns exif-corrected PIL size\n    s = img.size  # (width, height)\n    try:\n        rotation = dict(img._getexif().items())[orientation]\n        if rotation == 6:  # rotation 270  顺时针翻转90度\n            s = (s[1], s[0])\n        elif rotation == 8:  # ratation 90  逆时针翻转90度\n            s = (s[1], s[0])\n    except:\n        # 如果图像的exif信息中没有旋转信息，则跳过\n        pass\n\n    return s\n\n\nclass LoadImagesAndLabels(Dataset):  # for training/testing\n    def __init__(self,\n                 path,   # 指向data/my_train_data.txt路径或data/my_val_data.txt路径\n                 # 这里设置的是预处理后输出的图片尺寸\n                 # 当为训练集时，设置的是训练过程中(开启多尺度)的最大尺寸\n                 # 当为验证集时，设置的是最终使用的网络大小\n                 img_size=416,\n                 batch_size=16,\n                 augment=False,  # 训练集设置为True(augment_hsv)，验证集设置为False\n                 hyp=None,  # 超参数字典，其中包含图像增强会使用到的超参数\n                 rect=False,  # 是否使用rectangular training\n                 cache_images=False,  # 是否缓存图片到内存中\n                 single_cls=False, pad=0.0, rank=-1):\n\n        try:\n            path = str(Path(path))\n            # parent = str(Path(path).parent) + os.sep\n            if os.path.isfile(path):  # file\n                # 读取对应my_train/val_data.txt文件，读取每一行的图片路劲信息\n                with open(path, \"r\") as f:\n                    f = f.read().splitlines()\n            else:\n                raise Exception(\"%s does not exist\" % path)\n\n            # 检查每张图片后缀格式是否在支持的列表中，保存支持的图像路径\n            # img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.dng']\n            self.img_files = [x for x in f if os.path.splitext(x)[-1].lower() in img_formats]\n            self.img_files.sort()  # 防止不同系统排序不同，导致shape文件出现差异\n        except Exception as e:\n            raise FileNotFoundError(\"Error loading data from {}. {}\".format(path, e))\n\n        # 如果图片列表中没有图片，则报错\n        n = len(self.img_files)\n        assert n > 0, \"No images found in %s. See %s\" % (path, help_url)\n\n        # batch index\n        # 将数据划分到一个个batch中\n        bi = np.floor(np.arange(n) / batch_size).astype(np.int)\n        # 记录数据集划分后的总batch数\n        nb = bi[-1] + 1  # number of batches\n\n        self.n = n  # number of images 图像总数目\n        self.batch = bi  # batch index of image 记录哪些图片属于哪个batch\n        self.img_size = img_size  # 这里设置的是预处理后输出的图片尺寸\n        self.augment = augment  # 是否启用augment_hsv\n        self.hyp = hyp  # 超参数字典，其中包含图像增强会使用到的超参数\n        self.rect = rect  # 是否使用rectangular training\n        # 注意: 开启rect后，mosaic就默认关闭\n        self.mosaic = self.augment and not self.rect  # load 4 images at a time into a mosaic (only during training)\n\n        # Define labels\n        # 遍历设置图像对应的label路径\n        # (./my_yolo_dataset/train/images/2009_004012.jpg) -> (./my_yolo_dataset/train/labels/2009_004012.txt)\n        self.label_files = [x.replace(\"images\", \"labels\").replace(os.path.splitext(x)[-1], \".txt\")\n                            for x in self.img_files]\n\n        # Read image shapes (wh)\n        # 查看data文件下是否缓存有对应数据集的.shapes文件，里面存储了每张图像的width, height\n        sp = path.replace(\".txt\", \".shapes\")  # shapefile path\n        try:\n            with open(sp, \"r\") as f:  # read existing shapefile\n                s = [x.split() for x in f.read().splitlines()]\n                # 判断现有的shape文件中的行数(图像个数)是否与当前数据集中图像个数相等\n                # 如果不相等则认为是不同的数据集，故重新生成shape文件\n                assert len(s) == n, \"shapefile out of aync\"\n        except Exception as e:\n            # print(\"read {} failed [{}], rebuild {}.\".format(sp, e, sp))\n            # tqdm库会显示处理的进度\n            # 读取每张图片的size信息\n            if rank in [-1, 0]:\n                image_files = tqdm(self.img_files, desc=\"Reading image shapes\")\n            else:\n                image_files = self.img_files\n            s = [exif_size(Image.open(f)) for f in image_files]\n            # 将所有图片的shape信息保存在.shape文件中\n            np.savetxt(sp, s, fmt=\"%g\")  # overwrite existing (if any)\n\n        # 记录每张图像的原始尺寸\n        self.shapes = np.array(s, dtype=np.float64)\n\n        # Rectangular Training https://github.com/ultralytics/yolov3/issues/232\n        # 如果为ture，训练网络时，会使用类似原图像比例的矩形(让最长边为img_size)，而不是img_size x img_size\n        # 注意: 开启rect后，mosaic就默认关闭\n        if self.rect:\n            # Sort by aspect ratio\n            s = self.shapes  # wh\n            # 计算每个图片的高/宽比\n            ar = s[:, 1] / s[:, 0]  # aspect ratio\n            # argsort函数返回的是数组值从小到大的索引值\n            # 按照高宽比例进行排序，这样后面划分的每个batch中的图像就拥有类似的高宽比\n            irect = ar.argsort()\n            # 根据排序后的顺序重新设置图像顺序、标签顺序以及shape顺序\n            self.img_files = [self.img_files[i] for i in irect]\n            self.label_files = [self.label_files[i] for i in irect]\n            self.shapes = s[irect]  # wh\n            ar = ar[irect]\n\n            # set training image shapes\n            # 计算每个batch采用的统一尺度\n            shapes = [[1, 1]] * nb  # nb: number of batches\n            for i in range(nb):\n                ari = ar[bi == i]  # bi: batch index\n                # 获取第i个batch中，最小和最大高宽比\n                mini, maxi = ari.min(), ari.max()\n\n                # 如果高/宽小于1(w > h)，将w设为img_size\n                if maxi < 1:\n                    shapes[i] = [maxi, 1]\n                # 如果高/宽大于1(w < h)，将h设置为img_size\n                elif mini > 1:\n                    shapes[i] = [1, 1 / mini]\n            # 计算每个batch输入网络的shape值(向上设置为32的整数倍)\n            self.batch_shapes = np.ceil(np.array(shapes) * img_size / 32. + pad).astype(np.int) * 32\n\n        # cache labels\n        self.imgs = [None] * n  # n为图像总数\n        # label: [class, x, y, w, h] 其中的xywh都为相对值\n        self.labels = [np.zeros((0, 5), dtype=np.float32)] * n\n        extract_bounding_boxes, labels_loaded = False, False\n        nm, nf, ne, nd = 0, 0, 0, 0  # number mission, found, empty, duplicate\n        # 这里分别命名是为了防止出现rect为False/True时混用导致计算的mAP错误\n        # 当rect为True时会对self.images和self.labels进行从新排序\n        if rect is True:\n            np_labels_path = str(Path(self.label_files[0]).parent) + \".rect.npy\"  # saved labels in *.npy file\n        else:\n            np_labels_path = str(Path(self.label_files[0]).parent) + \".norect.npy\"\n\n        if os.path.isfile(np_labels_path):\n            x = np.load(np_labels_path, allow_pickle=True)\n            if len(x) == n:\n                # 如果载入的缓存标签个数与当前计算的图像数目相同则认为是同一数据集，直接读缓存\n                self.labels = x\n                labels_loaded = True\n\n        # 处理进度条只在第一个进程中显示\n        if rank in [-1, 0]:\n            pbar = tqdm(self.label_files)\n        else:\n            pbar = self.label_files\n\n        # 遍历载入标签文件\n        for i, file in enumerate(pbar):\n            if labels_loaded is True:\n                # 如果存在缓存直接从缓存读取\n                l = self.labels[i]\n            else:\n                # 从文件读取标签信息\n                try:\n                    with open(file, \"r\") as f:\n                        # 读取每一行label，并按空格划分数据\n                        l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)\n                except Exception as e:\n                    print(\"An error occurred while loading the file {}: {}\".format(file, e))\n                    nm += 1  # file missing\n                    continue\n\n            # 如果标注信息不为空的话\n            if l.shape[0]:\n                # 标签信息每行必须是五个值[class, x, y, w, h]\n                assert l.shape[1] == 5, \"> 5 label columns: %s\" % file\n                assert (l >= 0).all(), \"negative labels: %s\" % file\n                assert (l[:, 1:] <= 1).all(), \"non-normalized or out of bounds coordinate labels: %s\" % file\n\n                # 检查每一行，看是否有重复信息\n                if np.unique(l, axis=0).shape[0] < l.shape[0]:  # duplicate rows\n                    nd += 1\n                if single_cls:\n                    l[:, 0] = 0  # force dataset into single-class mode\n\n                self.labels[i] = l\n                nf += 1  # file found\n\n                # Extract object detection boxes for a second stage classifier\n                if extract_bounding_boxes:\n                    p = Path(self.img_files[i])\n                    img = cv2.imread(str(p))\n                    h, w = img.shape[:2]\n                    for j, x in enumerate(l):\n                        f = \"%s%sclassifier%s%g_%g_%s\" % (p.parent.parent, os.sep, os.sep, x[0], j, p.name)\n                        if not os.path.exists(Path(f).parent):\n                            os.makedirs(Path(f).parent)  # make new output folder\n\n                        # 将相对坐标转为绝对坐标\n                        # b: x, y, w, h\n                        b = x[1:] * [w, h, w, h]  # box\n                        # 将宽和高设置为宽和高中的最大值\n                        b[2:] = b[2:].max()  # rectangle to square\n                        # 放大裁剪目标的宽高\n                        b[2:] = b[2:] * 1.3 + 30  # pad\n                        # 将坐标格式从 x,y,w,h -> xmin,ymin,xmax,ymax\n                        b = xywh2xyxy(b.reshape(-1, 4)).revel().astype(np.int)\n\n                        # 裁剪bbox坐标到图片内\n                        b[[0, 2]] = np.clip[b[[0, 2]], 0, w]\n                        b[[1, 3]] = np.clip[b[[1, 3]], 0, h]\n                        assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), \"Failure extracting classifier boxes\"\n            else:\n                ne += 1  # file empty\n\n            # 处理进度条只在第一个进程中显示\n            if rank in [-1, 0]:\n                # 更新进度条描述信息\n                pbar.desc = \"Caching labels (%g found, %g missing, %g empty, %g duplicate, for %g images)\" % (\n                    nf, nm, ne, nd, n)\n        assert nf > 0, \"No labels found in %s.\" % os.path.dirname(self.label_files[0]) + os.sep\n\n        # 如果标签信息没有被保存成numpy的格式，且训练样本数大于1000则将标签信息保存成numpy的格式\n        if not labels_loaded and n > 1000:\n            print(\"Saving labels to %s for faster future loading\" % np_labels_path)\n            np.save(np_labels_path, self.labels)  # save for next time\n\n        # Cache images into memory for faster training (Warning: large datasets may exceed system RAM)\n        if cache_images:  # if training\n            gb = 0  # Gigabytes of cached images 用于记录缓存图像占用RAM大小\n            if rank in [-1, 0]:\n                pbar = tqdm(range(len(self.img_files)), desc=\"Caching images\")\n            else:\n                pbar = range(len(self.img_files))\n\n            self.img_hw0, self.img_hw = [None] * n, [None] * n\n            for i in pbar:  # max 10k images\n                self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(self, i)  # img, hw_original, hw_resized\n                gb += self.imgs[i].nbytes  # 用于记录缓存图像占用RAM大小\n                if rank in [-1, 0]:\n                    pbar.desc = \"Caching images (%.1fGB)\" % (gb / 1E9)\n\n        # Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3\n        detect_corrupted_images = False\n        if detect_corrupted_images:\n            from skimage import io  # conda install -c conda-forge scikit-image\n            for file in tqdm(self.img_files, desc=\"Detecting corrupted images\"):\n                try:\n                    _ = io.imread(file)\n                except Exception as e:\n                    print(\"Corrupted image detected: {}, {}\".format(file, e))\n\n    def __len__(self):\n        return len(self.img_files)\n\n    def __getitem__(self, index):\n        hyp = self.hyp\n        if self.mosaic:\n            # load mosaic\n            img, labels = load_mosaic(self, index)\n            shapes = None\n        else:\n            # load image\n            img, (h0, w0), (h, w) = load_image(self, index)\n\n            # letterbox\n            shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size  # final letterboxed shape\n            img, ratio, pad = letterbox(img, shape, auto=False, scale_up=self.augment)\n            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling\n\n            # load labels\n            labels = []\n            x = self.labels[index]\n            if x.size > 0:\n                # Normalized xywh to pixel xyxy format\n                labels = x.copy()  # label: class, x, y, w, h\n                labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0]  # pad width\n                labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1]  # pad height\n                labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]\n                labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]\n\n        if self.augment:\n            # Augment imagespace\n            if not self.mosaic:\n                img, labels = random_affine(img, labels,\n                                            degrees=hyp[\"degrees\"],\n                                            translate=hyp[\"translate\"],\n                                            scale=hyp[\"scale\"],\n                                            shear=hyp[\"shear\"])\n\n            # Augment colorspace\n            augment_hsv(img, h_gain=hyp[\"hsv_h\"], s_gain=hyp[\"hsv_s\"], v_gain=hyp[\"hsv_v\"])\n\n        nL = len(labels)  # number of labels\n        if nL:\n            # convert xyxy to xywh\n            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])\n\n            # Normalize coordinates 0-1\n            labels[:, [2, 4]] /= img.shape[0]  # height\n            labels[:, [1, 3]] /= img.shape[1]  # width\n\n        if self.augment:\n            # random left-right flip\n            lr_flip = True  # 随机水平翻转\n            if lr_flip and random.random() < 0.5:\n                img = np.fliplr(img)\n                if nL:\n                    labels[:, 1] = 1 - labels[:, 1]  # 1 - x_center\n\n            # random up-down flip\n            ud_flip = False\n            if ud_flip and random.random() < 0.5:\n                img = np.flipud(img)\n                if nL:\n                    labels[:, 2] = 1 - labels[:, 2]  # 1 - y_center\n\n        labels_out = torch.zeros((nL, 6))  # nL: number of labels\n        if nL:\n            labels_out[:, 1:] = torch.from_numpy(labels)\n\n        # Convert BGR to RGB, and HWC to CHW(3x512x512)\n        img = img[:, :, ::-1].transpose(2, 0, 1)\n        img = np.ascontiguousarray(img)\n\n        return torch.from_numpy(img), labels_out, self.img_files[index], shapes, index\n\n    def coco_index(self, index):\n        \"\"\"该方法是专门为cocotools统计标签信息准备，不对图像和标签作任何处理\"\"\"\n        o_shapes = self.shapes[index][::-1]  # wh to hw\n\n        # load labels\n        x = self.labels[index]\n        labels = x.copy()  # label: class, x, y, w, h\n        return torch.from_numpy(labels), o_shapes\n\n    @staticmethod\n    def collate_fn(batch):\n        img, label, path, shapes, index = zip(*batch)  # transposed\n        for i, l in enumerate(label):\n            l[:, 0] = i  # add target image index for build_targets()\n        return torch.stack(img, 0), torch.cat(label, 0), path, shapes, index\n\n\ndef load_image(self, index):\n    # loads 1 image from dataset, returns img, original hw, resized hw\n    img = self.imgs[index]\n    if img is None:  # not cached\n        path = self.img_files[index]\n        img = cv2.imread(path)  # BGR\n        assert img is not None, \"Image Not Found \" + path\n        h0, w0 = img.shape[:2]  # orig hw\n        # img_size 设置的是预处理后输出的图片尺寸\n        r = self.img_size / max(h0, w0)  # resize image to img_size\n        if r != 1:  # if sizes are not equal\n            interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR\n            img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)\n        return img, (h0, w0), img.shape[:2]  # img, hw_original, hw_resized\n    else:\n        return self.imgs[index], self.img_hw0[index], self.img_hw[index]  # img, hw_original, hw_resized\n\n\ndef load_mosaic(self, index):\n    \"\"\"\n    将四张图片拼接在一张马赛克图像中\n    :param self:\n    :param index: 需要获取的图像索引\n    :return:\n    \"\"\"\n    # loads images in a mosaic\n\n    labels4 = []  # 拼接图像的label信息\n    s = self.img_size\n    # 随机初始化拼接图像的中心点坐标\n    xc, yc = [int(random.uniform(s * 0.5, s * 1.5)) for _ in range(2)]  # mosaic center x, y\n    # 从dataset中随机寻找三张图像进行拼接\n    indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)]  # 3 additional image indices\n    # 遍历四张图像进行拼接\n    for i, index in enumerate(indices):\n        # load image\n        img, _, (h, w) = load_image(self, index)\n\n        # place img in img4\n        if i == 0:  # top left\n            # 创建马赛克图像\n            img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles\n            # 计算马赛克图像中的坐标信息(将图像填充到马赛克图像中)\n            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)\n            # 计算截取的图像区域信息(以xc,yc为第一张图像的右下角坐标填充到马赛克图像中，丢弃越界的区域)\n            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)\n        elif i == 1:  # top right\n            # 计算马赛克图像中的坐标信息(将图像填充到马赛克图像中)\n            x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc\n            # 计算截取的图像区域信息(以xc,yc为第二张图像的左下角坐标填充到马赛克图像中，丢弃越界的区域)\n            x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h\n        elif i == 2:  # bottom left\n            # 计算马赛克图像中的坐标信息(将图像填充到马赛克图像中)\n            x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)\n            # 计算截取的图像区域信息(以xc,yc为第三张图像的右上角坐标填充到马赛克图像中，丢弃越界的区域)\n            x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h)\n        elif i == 3:  # bottom right\n            # 计算马赛克图像中的坐标信息(将图像填充到马赛克图像中)\n            x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)\n            # 计算截取的图像区域信息(以xc,yc为第四张图像的左上角坐标填充到马赛克图像中，丢弃越界的区域)\n            x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)\n\n        # 将截取的图像区域填充到马赛克图像的相应位置\n        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]\n        # 计算pad(图像边界与马赛克边界的距离，越界的情况为负值)\n        padw = x1a - x1b\n        padh = y1a - y1b\n\n        # Labels 获取对应拼接图像的labels信息\n        # [class_index, x_center, y_center, w, h]\n        x = self.labels[index]\n        labels = x.copy()  # 深拷贝，防止修改原数据\n        if x.size > 0:  # Normalized xywh to pixel xyxy format\n            # 计算标注数据在马赛克图像中的坐标(绝对坐标)\n            labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw   # xmin\n            labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh   # ymin\n            labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw   # xmax\n            labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh   # ymax\n        labels4.append(labels)\n\n    # Concat/clip labels\n    if len(labels4):\n        labels4 = np.concatenate(labels4, 0)\n        # 设置上下限防止越界\n        np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:])  # use with random_affine\n\n    # Augment\n    # 随机旋转，缩放，平移以及错切\n    img4, labels4 = random_affine(img4, labels4,\n                                  degrees=self.hyp['degrees'],\n                                  translate=self.hyp['translate'],\n                                  scale=self.hyp['scale'],\n                                  shear=self.hyp['shear'],\n                                  border=-s // 2)  # border to remove\n\n    return img4, labels4\n\n\ndef random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, border=0):\n    \"\"\"随机旋转，缩放，平移以及错切\"\"\"\n    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))\n    # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4\n    # 这里可以参考我写的博文: https://blog.csdn.net/qq_37541097/article/details/119420860\n    # targets = [cls, xyxy]\n\n    # 最终输出的图像尺寸，等于img4.shape / 2\n    height = img.shape[0] + border * 2\n    width = img.shape[1] + border * 2\n\n    # Rotation and Scale\n    # 生成旋转以及缩放矩阵\n    R = np.eye(3)  # 生成对角阵\n    a = random.uniform(-degrees, degrees)  # 随机旋转角度\n    s = random.uniform(1 - scale, 1 + scale)  # 随机缩放因子\n    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)\n\n    # Translation\n    # 生成平移矩阵\n    T = np.eye(3)\n    T[0, 2] = random.uniform(-translate, translate) * img.shape[0] + border  # x translation (pixels)\n    T[1, 2] = random.uniform(-translate, translate) * img.shape[1] + border  # y translation (pixels)\n\n    # Shear\n    # 生成错切矩阵\n    S = np.eye(3)\n    S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)\n    S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)\n\n    # Combined rotation matrix\n    M = S @ T @ R  # ORDER IS IMPORTANT HERE!!\n    if (border != 0) or (M != np.eye(3)).any():  # image changed\n        # 进行仿射变化\n        img = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=(114, 114, 114))\n\n    # Transform label coordinates\n    n = len(targets)\n    if n:\n        # warp points\n        xy = np.ones((n * 4, 3))\n        xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1\n        # [4*n, 3] -> [n, 8]\n        xy = (xy @ M.T)[:, :2].reshape(n, 8)\n\n        # create new boxes\n        # 对transform后的bbox进行修正(假设变换后的bbox变成了菱形，此时要修正成矩形)\n        x = xy[:, [0, 2, 4, 6]]  # [n, 4]\n        y = xy[:, [1, 3, 5, 7]]  # [n, 4]\n        xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T  # [n, 4]\n\n        # reject warped points outside of image\n        # 对坐标进行裁剪，防止越界\n        xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)\n        xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)\n        w = xy[:, 2] - xy[:, 0]\n        h = xy[:, 3] - xy[:, 1]\n\n        # 计算调整后的每个box的面积\n        area = w * h\n        # 计算调整前的每个box的面积\n        area0 = (targets[:, 3] - targets[:, 1]) * (targets[:, 4] - targets[:, 2])\n        # 计算每个box的比例\n        ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))  # aspect ratio\n        # 选取长宽大于4个像素，且调整前后面积比例大于0.2，且比例小于10的box\n        i = (w > 4) & (h > 4) & (area / (area0 * s + 1e-16) > 0.2) & (ar < 10)\n\n        targets = targets[i]\n        targets[:, 1:5] = xy[i]\n\n    return img, targets\n\n\ndef augment_hsv(img, h_gain=0.5, s_gain=0.5, v_gain=0.5):\n    # 这里可以参考我写的博文:https://blog.csdn.net/qq_37541097/article/details/119478023\n    r = np.random.uniform(-1, 1, 3) * [h_gain, s_gain, v_gain] + 1  # random gains\n    hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))\n    dtype = img.dtype  # uint8\n\n    x = np.arange(0, 256, dtype=np.int16)\n    lut_hue = ((x * r[0]) % 180).astype(dtype)\n    lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)\n    lut_val = np.clip(x * r[2], 0, 255).astype(dtype)\n\n    img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)\n    cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)  # no return needed\n\n\ndef letterbox(img: np.ndarray,\n              new_shape=(416, 416),\n              color=(114, 114, 114),\n              auto=True,\n              scale_fill=False,\n              scale_up=True):\n    \"\"\"\n    将图片缩放调整到指定大小\n    :param img:\n    :param new_shape:\n    :param color:\n    :param auto:\n    :param scale_fill:\n    :param scale_up:\n    :return:\n    \"\"\"\n\n    shape = img.shape[:2]  # [h, w]\n    if isinstance(new_shape, int):\n        new_shape = (new_shape, new_shape)\n\n    # scale ratio (new / old)\n    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])\n    if not scale_up:  # only scale down, do not scale up (for better test mAP) 对于大于指定输入大小的图片进行缩放,小于的不变\n        r = min(r, 1.0)\n\n    # compute padding\n    ratio = r, r  # width, height ratios\n    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))\n    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding\n    if auto:  # minimun rectangle 保证原图比例不变，将图像最大边缩放到指定大小\n        # 这里的取余操作可以保证padding后的图片是32的整数倍\n        dw, dh = np.mod(dw, 32), np.mod(dh, 32)  # wh padding\n    elif scale_fill:  # stretch 简单粗暴的将图片缩放到指定尺寸\n        dw, dh = 0, 0\n        new_unpad = new_shape\n        ratio = new_shape[0] / shape[1], new_shape[1] / shape[0]  # wh ratios\n\n    dw /= 2  # divide padding into 2 sides 将padding分到上下，左右两侧\n    dh /= 2\n\n    # shape:[h, w]  new_unpad:[w, h]\n    if shape[::-1] != new_unpad:\n        img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)\n    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))  # 计算上下两侧的padding\n    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))  # 计算左右两侧的padding\n\n    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border\n    return img, ratio, (dw, dh)\n\n\ndef create_folder(path=\"./new_folder\"):\n    # Create floder\n    if os.path.exists(path):\n        shutil.rmtree(path)  # dalete output folder\n    os.makedirs(path)  # make new output folder\n\n\n\n\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/build_utils/img_utils.py",
    "content": "import numpy as np\nimport cv2\n\n\ndef letterbox(img: np.ndarray,\n              new_shape=(416, 416),\n              color=(114, 114, 114),\n              auto=True,\n              scale_fill=False,\n              scale_up=True):\n    \"\"\"\n    将图片缩放调整到指定大小\n    :param img: 输入的图像numpy格式\n    :param new_shape: 输入网络的shape\n    :param color: padding用什么颜色填充\n    :param auto:\n    :param scale_fill: 简单粗暴缩放到指定大小\n    :param scale_up:  只缩小，不放大\n    :return:\n    \"\"\"\n\n    shape = img.shape[:2]  # [h, w]\n    if isinstance(new_shape, int):\n        new_shape = (new_shape, new_shape)\n\n    # scale ratio (new / old)\n    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])\n    if not scale_up:  # only scale down, do not scale up (for better test mAP) 对于大于指定输入大小的图片进行缩放,小于的不变\n        r = min(r, 1.0)\n\n    # compute padding\n    ratio = r, r  # width, height ratios\n    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))\n    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding\n    if auto:  # minimun rectangle 保证原图比例不变，将图像最大边缩放到指定大小\n        # 这里的取余操作可以保证padding后的图片是32的整数倍(416x416)，如果是(512x512)可以保证是64的整数倍\n        dw, dh = np.mod(dw, 64), np.mod(dh, 64)  # wh padding\n    elif scale_fill:  # stretch 简单粗暴的将图片缩放到指定尺寸\n        dw, dh = 0, 0\n        new_unpad = new_shape[::-1]  # [h, w] -> [w, h]\n        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # wh ratios\n\n    dw /= 2  # divide padding into 2 sides 将padding分到上下，左右两侧\n    dh /= 2\n\n    # shape:[h, w]  new_unpad:[w, h]\n    if shape[::-1] != new_unpad:\n        img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)\n    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))  # 计算上下两侧的padding\n    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))  # 计算左右两侧的padding\n\n    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border\n    return img, ratio, (dw, dh)\n\n\n\n\n\n\n\n\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/build_utils/layers.py",
    "content": "import torch.nn.functional as F\nfrom .utils import *\n\n\ndef make_divisible(v, divisor):\n    # Function ensures all layers have a channel number that is divisible by 8\n    # https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py\n    return math.ceil(v / divisor) * divisor\n\n\nclass Flatten(nn.Module):\n    # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions\n    def forward(self, x):\n        return x.view(x.size(0), -1)\n\n\nclass Concat(nn.Module):\n    # Concatenate a list of tensors along dimension\n    def __init__(self, dimension=1):\n        super(Concat, self).__init__()\n        self.d = dimension\n\n    def forward(self, x):\n        return torch.cat(x, self.d)\n\n\nclass FeatureConcat(nn.Module):\n    \"\"\"\n    将多个特征矩阵在channel维度进行concatenate拼接\n    \"\"\"\n    def __init__(self, layers):\n        super(FeatureConcat, self).__init__()\n        self.layers = layers  # layer indices\n        self.multiple = len(layers) > 1  # multiple layers flag\n\n    def forward(self, x, outputs):\n        return torch.cat([outputs[i] for i in self.layers], 1) if self.multiple else outputs[self.layers[0]]\n\n\nclass WeightedFeatureFusion(nn.Module):  # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070\n    \"\"\"\n    将多个特征矩阵的值进行融合(add操作)\n    \"\"\"\n    def __init__(self, layers, weight=False):\n        super(WeightedFeatureFusion, self).__init__()\n        self.layers = layers  # layer indices\n        self.weight = weight  # apply weights boolean\n        self.n = len(layers) + 1  # number of layers 融合的特征矩阵个数\n        if weight:\n            self.w = nn.Parameter(torch.zeros(self.n), requires_grad=True)  # layer weights\n\n    def forward(self, x, outputs):\n        # Weights\n        if self.weight:\n            w = torch.sigmoid(self.w) * (2 / self.n)  # sigmoid weights (0-1)\n            x = x * w[0]\n\n        # Fusion\n        nx = x.shape[1]  # input channels\n        for i in range(self.n - 1):\n            a = outputs[self.layers[i]] * w[i + 1] if self.weight else outputs[self.layers[i]]  # feature to add\n            na = a.shape[1]  # feature channels\n\n            # Adjust channels\n            # 根据相加的两个特征矩阵的channel选择相加方式\n            if nx == na:  # same shape 如果channel相同，直接相加\n                x = x + a\n            elif nx > na:  # slice input 如果channel不同，将channel多的特征矩阵砍掉部分channel保证相加的channel一致\n                x[:, :na] = x[:, :na] + a  # or a = nn.ZeroPad2d((0, 0, 0, 0, 0, dc))(a); x = x + a\n            else:  # slice feature\n                x = x + a[:, :nx]\n\n        return x\n\n\nclass MixConv2d(nn.Module):  # MixConv: Mixed Depthwise Convolutional Kernels https://arxiv.org/abs/1907.09595\n    def __init__(self, in_ch, out_ch, k=(3, 5, 7), stride=1, dilation=1, bias=True, method='equal_params'):\n        super(MixConv2d, self).__init__()\n\n        groups = len(k)\n        if method == 'equal_ch':  # equal channels per group\n            i = torch.linspace(0, groups - 1E-6, out_ch).floor()  # out_ch indices\n            ch = [(i == g).sum() for g in range(groups)]\n        else:  # 'equal_params': equal parameter count per group\n            b = [out_ch] + [0] * groups\n            a = np.eye(groups + 1, groups, k=-1)\n            a -= np.roll(a, 1, axis=1)\n            a *= np.array(k) ** 2\n            a[0] = 1\n            ch = np.linalg.lstsq(a, b, rcond=None)[0].round().astype(int)  # solve for equal weight indices, ax = b\n\n        self.m = nn.ModuleList([nn.Conv2d(in_channels=in_ch,\n                                          out_channels=ch[g],\n                                          kernel_size=k[g],\n                                          stride=stride,\n                                          padding=k[g] // 2,  # 'same' pad\n                                          dilation=dilation,\n                                          bias=bias) for g in range(groups)])\n\n    def forward(self, x):\n        return torch.cat([m(x) for m in self.m], 1)\n\n\n# Activation functions below -------------------------------------------------------------------------------------------\nclass SwishImplementation(torch.autograd.Function):\n    @staticmethod\n    def forward(ctx, x):\n        ctx.save_for_backward(x)\n        return x * torch.sigmoid(x)\n\n    @staticmethod\n    def backward(ctx, grad_output):\n        x = ctx.saved_tensors[0]\n        sx = torch.sigmoid(x)  # sigmoid(ctx)\n        return grad_output * (sx * (1 + x * (1 - sx)))\n\n\nclass MishImplementation(torch.autograd.Function):\n    @staticmethod\n    def forward(ctx, x):\n        ctx.save_for_backward(x)\n        return x.mul(torch.tanh(F.softplus(x)))  # x * tanh(ln(1 + exp(x)))\n\n    @staticmethod\n    def backward(ctx, grad_output):\n        x = ctx.saved_tensors[0]\n        sx = torch.sigmoid(x)\n        fx = F.softplus(x).tanh()\n        return grad_output * (fx + x * sx * (1 - fx * fx))\n\n\nclass MemoryEfficientSwish(nn.Module):\n    def forward(self, x):\n        return SwishImplementation.apply(x)\n\n\nclass MemoryEfficientMish(nn.Module):\n    def forward(self, x):\n        return MishImplementation.apply(x)\n\n\nclass Swish(nn.Module):\n    def forward(self, x):\n        return x * torch.sigmoid(x)\n\n\nclass HardSwish(nn.Module):  # https://arxiv.org/pdf/1905.02244.pdf\n    def forward(self, x):\n        return x * F.hardtanh(x + 3, 0., 6., True) / 6.\n\n\nclass Mish(nn.Module):  # https://github.com/digantamisra98/Mish\n    def forward(self, x):\n        return x * F.softplus(x).tanh()\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/build_utils/parse_config.py",
    "content": "import os\nimport numpy as np\n\n\ndef parse_model_cfg(path: str):\n    # 检查文件是否存在\n    if not path.endswith(\".cfg\") or not os.path.exists(path):\n        raise FileNotFoundError(\"the cfg file not exist...\")\n\n    # 读取文件信息\n    with open(path, \"r\") as f:\n        lines = f.read().split(\"\\n\")\n\n    # 去除空行和注释行\n    lines = [x for x in lines if x and not x.startswith(\"#\")]\n    # 去除每行开头和结尾的空格符\n    lines = [x.strip() for x in lines]\n\n    mdefs = []  # module definitions\n    for line in lines:\n        if line.startswith(\"[\"):  # this marks the start of a new block\n            mdefs.append({})\n            mdefs[-1][\"type\"] = line[1:-1].strip()  # 记录module类型\n            # 如果是卷积模块，设置默认不使用BN(普通卷积层后面会重写成1，最后的预测层conv保持为0)\n            if mdefs[-1][\"type\"] == \"convolutional\":\n                mdefs[-1][\"batch_normalize\"] = 0\n        else:\n            key, val = line.split(\"=\")\n            key = key.strip()\n            val = val.strip()\n\n            if key == \"anchors\":\n                # anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326\n                val = val.replace(\" \", \"\")  # 将空格去除\n                mdefs[-1][key] = np.array([float(x) for x in val.split(\",\")]).reshape((-1, 2))  # np anchors\n            elif (key in [\"from\", \"layers\", \"mask\"]) or (key == \"size\" and \",\" in val):\n                mdefs[-1][key] = [int(x) for x in val.split(\",\")]\n            else:\n                # TODO: .isnumeric() actually fails to get the float case\n                if val.isnumeric():  # return int or float 如果是数值的情况\n                    mdefs[-1][key] = int(val) if (int(val) - float(val)) == 0 else float(val)\n                else:\n                    mdefs[-1][key] = val  # return string  是字符的情况\n\n    # check all fields are supported\n    supported = ['type', 'batch_normalize', 'filters', 'size', 'stride', 'pad', 'activation', 'layers', 'groups',\n                 'from', 'mask', 'anchors', 'classes', 'num', 'jitter', 'ignore_thresh', 'truth_thresh', 'random',\n                 'stride_x', 'stride_y', 'weights_type', 'weights_normalization', 'scale_x_y', 'beta_nms', 'nms_kind',\n                 'iou_loss', 'iou_normalizer', 'cls_normalizer', 'iou_thresh', 'probability']\n\n    # 遍历检查每个模型的配置\n    for x in mdefs[1:]:  # 0对应net配置\n        # 遍历每个配置字典中的key值\n        for k in x:\n            if k not in supported:\n                raise ValueError(\"Unsupported fields:{} in cfg\".format(k))\n\n    return mdefs\n\n\ndef parse_data_cfg(path):\n    # Parses the data configuration file\n    if not os.path.exists(path) and os.path.exists('data' + os.sep + path):  # add data/ prefix if omitted\n        path = 'data' + os.sep + path\n\n    with open(path, 'r') as f:\n        lines = f.readlines()\n\n    options = dict()\n    for line in lines:\n        line = line.strip()\n        if line == '' or line.startswith('#'):\n            continue\n        key, val = line.split('=')\n        options[key.strip()] = val.strip()\n\n    return options\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/build_utils/torch_utils.py",
    "content": "import math\nimport time\nfrom copy import deepcopy\n\nimport torch\nimport torch.backends.cudnn as cudnn\nimport torch.nn as nn\n\n\ndef init_seeds(seed=0):\n    torch.manual_seed(seed)\n\n    # Reduce randomness (may be slower on Tesla GPUs) # https://pytorch.org/docs/stable/notes/randomness.html\n    if seed == 0:\n        cudnn.deterministic = False\n        cudnn.benchmark = True\n\n\ndef time_synchronized():\n    torch.cuda.synchronize() if torch.cuda.is_available() else None\n    return time.time()\n\n\ndef initialize_weights(model):\n    for m in model.modules():\n        t = type(m)\n        if t is nn.Conv2d:\n            pass  # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')\n        elif t is nn.BatchNorm2d:\n            m.eps = 1e-4\n            m.momentum = 0.03\n        elif t in [nn.LeakyReLU, nn.ReLU, nn.ReLU6]:\n            m.inplace = True\n\n\ndef model_info(model, verbose=False):\n    # Plots a line-by-line description of a PyTorch model\n    n_p = sum(x.numel() for x in model.parameters())  # number parameters\n    n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients\n    if verbose:\n        print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))\n        for i, (name, p) in enumerate(model.named_parameters()):\n            name = name.replace('module_list.', '')\n            print('%5g %40s %9s %12g %20s %10.3g %10.3g' %\n                  (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))\n\n    try:  # FLOPS\n        from thop import profile\n        macs, _ = profile(model, inputs=(torch.zeros(1, 3, 480, 640),), verbose=False)\n        fs = ', %.1f GFLOPS' % (macs / 1E9 * 2)\n    except:\n        fs = ''\n\n    print('Model Summary: %g layers, %g parameters, %g gradients%s' % (len(list(model.parameters())), n_p, n_g, fs))\n\n\nclass ModelEMA:\n    \"\"\" Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models\n    Keep a moving average of everything in the model state_dict (parameters and buffers).\n    This is intended to allow functionality like\n    https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage\n    A smoothed version of the weights is necessary for some training schemes to perform well.\n    E.g. Google's hyper-params for training MNASNet, MobileNet-V3, EfficientNet, etc that use\n    RMSprop with a short 2.4-3 epoch decay period and slow LR decay rate of .96-.99 requires EMA\n    smoothing of weights to match results. Pay attention to the decay constant you are using\n    relative to your update count per epoch.\n    To keep EMA from using GPU resources, set device='cpu'. This will save a bit of memory but\n    disable validation of the EMA weights. Validation will have to be done manually in a separate\n    process, or after the training stops converging.\n    This class is sensitive where it is initialized in the sequence of model init,\n    GPU assignment and distributed training wrappers.\n    I've tested with the sequence in my own train.py for torch.DataParallel, apex.DDP, and single-GPU.\n    \"\"\"\n\n    def __init__(self, model, decay=0.9999, device=''):\n        # make a copy of the model for accumulating moving average of weights\n        self.ema = deepcopy(model)\n        self.ema.eval()\n        self.updates = 0  # number of EMA updates\n        self.decay = lambda x: decay * (1 - math.exp(-x / 2000))  # decay exponential ramp (to help early epochs)\n        self.device = device  # perform ema on different device from model if set\n        if device:\n            self.ema.to(device=device)\n        for p in self.ema.parameters():\n            p.requires_grad_(False)\n\n    def update(self, model):\n        self.updates += 1\n        d = self.decay(self.updates)\n        with torch.no_grad():\n            if type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel):\n                msd, esd = model.module.state_dict(), self.ema.module.state_dict()\n            else:\n                msd, esd = model.state_dict(), self.ema.state_dict()\n\n            for k, v in esd.items():\n                if v.dtype.is_floating_point:\n                    v *= d\n                    v += (1. - d) * msd[k].detach()\n\n    def update_attr(self, model):\n        # Assign attributes (which may change during training)\n        for k in model.__dict__.keys():\n            if not k.startswith('_'):\n                setattr(self.ema, k, getattr(model, k))\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/build_utils/utils.py",
    "content": "import glob\nimport math\nimport os\nimport random\nimport time\n\nimport cv2\nimport matplotlib\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torchvision\nfrom tqdm import tqdm\n\nfrom build_utils import torch_utils  # , google_utils\n\n# Set printoptions\ntorch.set_printoptions(linewidth=320, precision=5, profile='long')\nnp.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format})  # format short g, %precision=5\nmatplotlib.rc('font', **{'size': 11})\n\n# Prevent OpenCV from multithreading (to use PyTorch DataLoader)\ncv2.setNumThreads(0)\n\n\ndef init_seeds(seed=0):\n    random.seed(seed)\n    np.random.seed(seed)\n    torch_utils.init_seeds(seed=seed)\n\n\ndef check_file(file):\n    # Searches for file if not found locally\n    if os.path.isfile(file):\n        return file\n    else:\n        files = glob.glob('./**/' + file, recursive=True)  # find file\n        assert len(files), 'File Not Found: %s' % file  # assert file was found\n        return files[0]  # return first file if multiple found\n\n\ndef xyxy2xywh(x):\n    # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right\n    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)\n    y[:, 0] = (x[:, 0] + x[:, 2]) / 2  # x center\n    y[:, 1] = (x[:, 1] + x[:, 3]) / 2  # y center\n    y[:, 2] = x[:, 2] - x[:, 0]  # width\n    y[:, 3] = x[:, 3] - x[:, 1]  # height\n    return y\n\n\ndef xywh2xyxy(x):\n    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right\n    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)\n    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x\n    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y\n    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x\n    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y\n    return y\n\n\ndef scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):\n    \"\"\"\n    将预测的坐标信息转换回原图尺度\n    :param img1_shape: 缩放后的图像尺度\n    :param coords: 预测的box信息\n    :param img0_shape: 缩放前的图像尺度\n    :param ratio_pad: 缩放过程中的缩放比例以及pad\n    :return:\n    \"\"\"\n    # Rescale coords (xyxy) from img1_shape to img0_shape\n    if ratio_pad is None:  # calculate from img0_shape\n        gain = max(img1_shape) / max(img0_shape)  # gain  = old / new\n        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding\n    else:\n        gain = ratio_pad[0][0]\n        pad = ratio_pad[1]\n\n    coords[:, [0, 2]] -= pad[0]  # x padding\n    coords[:, [1, 3]] -= pad[1]  # y padding\n    coords[:, :4] /= gain\n    clip_coords(coords, img0_shape)\n    return coords\n\n\ndef clip_coords(boxes, img_shape):\n    # Clip bounding xyxy bounding boxes to image shape (height, width)\n    boxes[:, 0].clamp_(0, img_shape[1])  # x1\n    boxes[:, 1].clamp_(0, img_shape[0])  # y1\n    boxes[:, 2].clamp_(0, img_shape[1])  # x2\n    boxes[:, 3].clamp_(0, img_shape[0])  # y2\n\n\ndef bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False):\n    # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4\n    box2 = box2.t()\n\n    # Get the coordinates of bounding boxes\n    if x1y1x2y2:  # x1, y1, x2, y2 = box1\n        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]\n        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]\n    else:  # transform from xywh to xyxy\n        b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2\n        b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2\n        b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2\n        b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2\n\n    # Intersection area\n    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \\\n            (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)\n\n    # Union Area\n    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1\n    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1\n    union = (w1 * h1 + 1e-16) + w2 * h2 - inter\n\n    iou = inter / union  # iou\n    if GIoU or DIoU or CIoU:\n        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex (smallest enclosing box) width\n        ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height\n        if GIoU:  # Generalized IoU https://arxiv.org/pdf/1902.09630.pdf\n            c_area = cw * ch + 1e-16  # convex area\n            return iou - (c_area - union) / c_area  # GIoU\n        if DIoU or CIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1\n            # convex diagonal squared\n            c2 = cw ** 2 + ch ** 2 + 1e-16\n            # centerpoint distance squared\n            rho2 = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2)) ** 2 / 4 + ((b2_y1 + b2_y2) - (b1_y1 + b1_y2)) ** 2 / 4\n            if DIoU:\n                return iou - rho2 / c2  # DIoU\n            elif CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47\n                v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)\n                with torch.no_grad():\n                    alpha = v / (1 - iou + v)\n                return iou - (rho2 / c2 + v * alpha)  # CIoU\n\n    return iou\n\n\ndef box_iou(box1, box2):\n    # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py\n    \"\"\"\n    Return intersection-over-union (Jaccard index) of boxes.\n    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.\n    Arguments:\n        box1 (Tensor[N, 4])\n        box2 (Tensor[M, 4])\n    Returns:\n        iou (Tensor[N, M]): the NxM matrix containing the pairwise\n            IoU values for every element in boxes1 and boxes2\n    \"\"\"\n\n    def box_area(box):\n        # box = 4xn\n        return (box[2] - box[0]) * (box[3] - box[1])\n\n    area1 = box_area(box1.t())\n    area2 = box_area(box2.t())\n\n    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)\n    inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)\n    return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)\n\n\ndef wh_iou(wh1, wh2):\n    # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2\n    wh1 = wh1[:, None]  # [N,1,2]\n    wh2 = wh2[None]  # [1,M,2]\n    inter = torch.min(wh1, wh2).prod(2)  # [N,M]\n    return inter / (wh1.prod(2) + wh2.prod(2) - inter)  # iou = inter / (area1 + area2 - inter)\n\n\nclass FocalLoss(nn.Module):\n    # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)\n    def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):\n        super(FocalLoss, self).__init__()\n        self.loss_fcn = loss_fcn  # must be nn.BCEWithLogitsLoss()\n        self.gamma = gamma\n        self.alpha = alpha\n        self.reduction = loss_fcn.reduction\n        self.loss_fcn.reduction = 'none'  # required to apply FL to each element\n\n    def forward(self, pred, true):\n        loss = self.loss_fcn(pred, true)\n        # p_t = torch.exp(-loss)\n        # loss *= self.alpha * (1.000001 - p_t) ** self.gamma  # non-zero power for gradient stability\n\n        # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py\n        pred_prob = torch.sigmoid(pred)  # prob from logits\n        p_t = true * pred_prob + (1 - true) * (1 - pred_prob)\n        alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)\n        modulating_factor = (1.0 - p_t) ** self.gamma\n        loss *= alpha_factor * modulating_factor\n\n        if self.reduction == 'mean':\n            return loss.mean()\n        elif self.reduction == 'sum':\n            return loss.sum()\n        else:  # 'none'\n            return loss\n\n\ndef smooth_BCE(eps=0.1):  # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441\n    # return positive, negative label smoothing BCE targets\n    return 1.0 - 0.5 * eps, 0.5 * eps\n\n\ndef compute_loss(p, targets, model):  # predictions, targets, model\n    device = p[0].device\n    lcls = torch.zeros(1, device=device)  # Tensor(0)\n    lbox = torch.zeros(1, device=device)  # Tensor(0)\n    lobj = torch.zeros(1, device=device)  # Tensor(0)\n    tcls, tbox, indices, anchors = build_targets(p, targets, model)  # targets\n    h = model.hyp  # hyperparameters\n    red = 'mean'  # Loss reduction (sum or mean)\n\n    # Define criteria\n    BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device), reduction=red)\n    BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device), reduction=red)\n\n    # class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3\n    cp, cn = smooth_BCE(eps=0.0)\n\n    # focal loss\n    g = h['fl_gamma']  # focal loss gamma\n    if g > 0:\n        BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)\n\n    # per output\n    for i, pi in enumerate(p):  # layer index, layer predictions\n        b, a, gj, gi = indices[i]  # image_idx, anchor_idx, grid_y, grid_x\n        tobj = torch.zeros_like(pi[..., 0], device=device)  # target obj\n\n        nb = b.shape[0]  # number of positive samples\n        if nb:\n            # 对应匹配到正样本的预测信息\n            ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets\n\n            # GIoU\n            pxy = ps[:, :2].sigmoid()\n            pwh = ps[:, 2:4].exp().clamp(max=1E3) * anchors[i]\n            pbox = torch.cat((pxy, pwh), 1)  # predicted box\n            giou = bbox_iou(pbox.t(), tbox[i], x1y1x2y2=False, GIoU=True)  # giou(prediction, target)\n            lbox += (1.0 - giou).mean()  # giou loss\n\n            # Obj\n            tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * giou.detach().clamp(0).type(tobj.dtype)  # giou ratio\n\n            # Class\n            if model.nc > 1:  # cls loss (only if multiple classes)\n                t = torch.full_like(ps[:, 5:], cn, device=device)  # targets\n                t[range(nb), tcls[i]] = cp\n                lcls += BCEcls(ps[:, 5:], t)  # BCE\n\n            # Append targets to text file\n            # with open('targets.txt', 'a') as file:\n            #     [file.write('%11.5g ' * 4 % tuple(x) + '\\n') for x in torch.cat((txy[i], twh[i]), 1)]\n\n        lobj += BCEobj(pi[..., 4], tobj)  # obj loss\n\n    # 乘上每种损失的对应权重\n    lbox *= h['giou']\n    lobj *= h['obj']\n    lcls *= h['cls']\n\n    # loss = lbox + lobj + lcls\n    return {\"box_loss\": lbox,\n            \"obj_loss\": lobj,\n            \"class_loss\": lcls}\n\n\ndef build_targets(p, targets, model):\n    # Build targets for compute_loss(), input targets(image_idx,class,x,y,w,h)\n    nt = targets.shape[0]\n    tcls, tbox, indices, anch = [], [], [], []\n    gain = torch.ones(6, device=targets.device).long()  # normalized to gridspace gain\n\n    multi_gpu = type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)\n    for i, j in enumerate(model.yolo_layers):  # j: [89, 101, 113]\n        # 获取该yolo predictor对应的anchors\n        # 注意anchor_vec是anchors缩放到对应特征层上的尺度\n        anchors = model.module.module_list[j].anchor_vec if multi_gpu else model.module_list[j].anchor_vec\n        # p[i].shape: [batch_size, 3, grid_h, grid_w, num_params]\n        gain[2:] = torch.tensor(p[i].shape)[[3, 2, 3, 2]]  # xyxy gain\n        na = anchors.shape[0]  # number of anchors\n        # [3] -> [3, 1] -> [3, nt]\n        at = torch.arange(na).view(na, 1).repeat(1, nt)  # anchor tensor, same as .repeat_interleave(nt)\n\n        # Match targets to anchors\n        a, t, offsets = [], targets * gain, 0\n        if nt:  # 如果存在target的话\n            # 通过计算anchor模板与所有target的wh_iou来匹配正样本\n            # j: [3, nt] , iou_t = 0.20\n            j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t']  # iou(3,n) = wh_iou(anchors(3,2), gwh(n,2))\n            # t.repeat(na, 1, 1): [nt, 6] -> [3, nt, 6]\n            # 获取正样本对应的anchor模板与target信息\n            a, t = at[j], t.repeat(na, 1, 1)[j]  # filter\n\n        # Define\n        # long等于to(torch.int64), 数值向下取整\n        b, c = t[:, :2].long().T  # image_idx, class\n        gxy = t[:, 2:4]  # grid xy\n        gwh = t[:, 4:6]  # grid wh\n        gij = (gxy - offsets).long()  # 匹配targets所在的grid cell左上角坐标\n        gi, gj = gij.T  # grid xy indices\n\n        # Append\n        # gain[3]: grid_h, gain[2]: grid_w\n        # image_idx, anchor_idx, grid indices(y, x)\n        indices.append((b, a, gj.clamp_(0, gain[3]-1), gi.clamp_(0, gain[2]-1)))\n        tbox.append(torch.cat((gxy - gij, gwh), 1))  # gt box相对anchor的x,y偏移量以及w,h\n        anch.append(anchors[a])  # anchors\n        tcls.append(c)  # class\n        if c.shape[0]:  # if any targets\n            # 目标的标签数值不能大于给定的目标类别数\n            assert c.max() < model.nc, 'Model accepts %g classes labeled from 0-%g, however you labelled a class %g. ' \\\n                                       'See https://github.com/ultralytics/yolov3/wiki/Train-Custom-Data' % (\n                                           model.nc, model.nc - 1, c.max())\n\n    return tcls, tbox, indices, anch\n\n\ndef non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6,\n                        multi_label=True, classes=None, agnostic=False, max_num=100):\n    \"\"\"\n    Performs  Non-Maximum Suppression on inference results\n\n    param: prediction[batch, num_anchors, (num_classes+1+4) x num_anchors]\n    Returns detections with shape:\n        nx6 (x1, y1, x2, y2, conf, cls)\n    \"\"\"\n\n    # Settings\n    merge = False  # merge for best mAP\n    min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height\n    time_limit = 10.0  # seconds to quit after\n\n    t = time.time()\n    nc = prediction[0].shape[1] - 5  # number of classes\n    multi_label &= nc > 1  # multiple labels per box\n    output = [None] * prediction.shape[0]\n    for xi, x in enumerate(prediction):  # image index, image inference 遍历每张图片\n        # Apply constraints\n        x = x[x[:, 4] > conf_thres]  # confidence 根据obj confidence虑除背景目标\n        x = x[((x[:, 2:4] > min_wh) & (x[:, 2:4] < max_wh)).all(1)]  # width-height 虑除小目标\n\n        # If none remain process next image\n        if not x.shape[0]:\n            continue\n\n        # Compute conf\n        x[..., 5:] *= x[..., 4:5]  # conf = obj_conf * cls_conf\n\n        # Box (center x, center y, width, height) to (x1, y1, x2, y2)\n        box = xywh2xyxy(x[:, :4])\n\n        # Detections matrix nx6 (xyxy, conf, cls)\n        if multi_label:  # 针对每个类别执行非极大值抑制\n            i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).t()\n            x = torch.cat((box[i], x[i, j + 5].unsqueeze(1), j.float().unsqueeze(1)), 1)\n        else:  # best class only  直接针对每个类别中概率最大的类别进行非极大值抑制处理\n            conf, j = x[:, 5:].max(1)\n            x = torch.cat((box, conf.unsqueeze(1), j.float().unsqueeze(1)), 1)[conf > conf_thres]\n\n        # Filter by class\n        if classes:\n            x = x[(j.view(-1, 1) == torch.tensor(classes, device=j.device)).any(1)]\n\n        # Apply finite constraint\n        # if not torch.isfinite(x).all():\n        #     x = x[torch.isfinite(x).all(1)]\n\n        # If none remain process next image\n        n = x.shape[0]  # number of boxes\n        if not n:\n            continue\n\n        # Sort by confidence\n        # x = x[x[:, 4].argsort(descending=True)]\n\n        # Batched NMS\n        c = x[:, 5] * 0 if agnostic else x[:, 5]  # classes\n        boxes, scores = x[:, :4].clone() + c.view(-1, 1) * max_wh, x[:, 4]  # boxes (offset by class), scores\n        i = torchvision.ops.nms(boxes, scores, iou_thres)\n        i = i[:max_num]  # 最多只保留前max_num个目标信息\n        if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)\n            try:  # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)\n                iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix\n                weights = iou * scores[None]  # box weights\n                x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes\n                # i = i[iou.sum(1) > 1]  # require redundancy\n            except:  # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139\n                print(x, i, x.shape, i.shape)\n                pass\n\n        output[xi] = x[i]\n        if (time.time() - t) > time_limit:\n            break  # time limit exceeded\n\n    return output\n\n\ndef get_yolo_layers(model):\n    bool_vec = [x['type'] == 'yolo' for x in model.module_defs]\n    return [i for i, x in enumerate(bool_vec) if x]  # [82, 94, 106] for yolov3\n\n\ndef kmean_anchors(path='./data/coco64.txt', n=9, img_size=(640, 640), thr=0.20, gen=1000):\n    # Creates kmeans anchors for use in *.cfg files: from build_utils.build_utils import *; _ = kmean_anchors()\n    # n: number of anchors\n    # img_size: (min, max) image size used for multi-scale training (can be same values)\n    # thr: IoU threshold hyperparameter used for training (0.0 - 1.0)\n    # gen: generations to evolve anchors using genetic algorithm\n    from build_utils.datasets import LoadImagesAndLabels\n\n    def print_results(k):\n        k = k[np.argsort(k.prod(1))]  # sort small to large\n        iou = wh_iou(wh, torch.Tensor(k))\n        max_iou = iou.max(1)[0]\n        bpr, aat = (max_iou > thr).float().mean(), (iou > thr).float().mean() * n  # best possible recall, anch > thr\n        print('%.2f iou_thr: %.3f best possible recall, %.2f anchors > thr' % (thr, bpr, aat))\n        print('n=%g, img_size=%s, IoU_all=%.3f/%.3f-mean/best, IoU>thr=%.3f-mean: ' %\n              (n, img_size, iou.mean(), max_iou.mean(), iou[iou > thr].mean()), end='')\n        for i, x in enumerate(k):\n            print('%i,%i' % (round(x[0]), round(x[1])), end=',  ' if i < len(k) - 1 else '\\n')  # use in *.cfg\n        return k\n\n    def fitness(k):  # mutation fitness\n        iou = wh_iou(wh, torch.Tensor(k))  # iou\n        max_iou = iou.max(1)[0]\n        return (max_iou * (max_iou > thr).float()).mean()  # product\n\n    # Get label wh\n    wh = []\n    dataset = LoadImagesAndLabels(path, augment=True, rect=True)\n    nr = 1 if img_size[0] == img_size[1] else 10  # number augmentation repetitions\n    for s, l in zip(dataset.shapes, dataset.labels):\n        wh.append(l[:, 3:5] * (s / s.max()))  # image normalized to letterbox normalized wh\n    wh = np.concatenate(wh, 0).repeat(nr, axis=0)  # augment 10x\n    wh *= np.random.uniform(img_size[0], img_size[1], size=(wh.shape[0], 1))  # normalized to pixels (multi-scale)\n    wh = wh[(wh > 2.0).all(1)]  # remove below threshold boxes (< 2 pixels wh)\n\n    # Kmeans calculation\n    from scipy.cluster.vq import kmeans\n    print('Running kmeans for %g anchors on %g points...' % (n, len(wh)))\n    s = wh.std(0)  # sigmas for whitening\n    k, dist = kmeans(wh / s, n, iter=30)  # points, mean distance\n    k *= s\n    wh = torch.Tensor(wh)\n    k = print_results(k)\n\n    # # Plot\n    # k, d = [None] * 20, [None] * 20\n    # for i in tqdm(range(1, 21)):\n    #     k[i-1], d[i-1] = kmeans(wh / s, i)  # points, mean distance\n    # fig, ax = plt.subplots(1, 2, figsize=(14, 7))\n    # ax = ax.ravel()\n    # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')\n    # fig, ax = plt.subplots(1, 2, figsize=(14, 7))  # plot wh\n    # ax[0].hist(wh[wh[:, 0]<100, 0],400)\n    # ax[1].hist(wh[wh[:, 1]<100, 1],400)\n    # fig.tight_layout()\n    # fig.savefig('wh.png', dpi=200)\n\n    # Evolve\n    npr = np.random\n    f, sh, mp, s = fitness(k), k.shape, 0.9, 0.1  # fitness, generations, mutation prob, sigma\n    for _ in tqdm(range(gen), desc='Evolving anchors'):\n        v = np.ones(sh)\n        while (v == 1).all():  # mutate until a change occurs (prevent duplicates)\n            v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)\n        kg = (k.copy() * v).clip(min=2.0)\n        fg = fitness(kg)\n        if fg > f:\n            f, k = fg, kg.copy()\n            print_results(k)\n    k = print_results(k)\n\n    return k\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/calculate_dataset.py",
    "content": "\"\"\"\n该脚本有3个功能：\n1.统计训练集和验证集的数据并生成相应.txt文件\n2.创建data.data文件，记录classes个数, train以及val数据集文件(.txt)路径和label.names文件路径\n3.根据yolov3-spp.cfg创建my_yolov3.cfg文件修改其中的predictor filters以及yolo classes参数(这两个参数是根据类别数改变的)\n\"\"\"\nimport os\n\ntrain_annotation_dir = \"./my_yolo_dataset/train/labels\"\nval_annotation_dir = \"./my_yolo_dataset/val/labels\"\nclasses_label = \"./data/my_data_label.names\"\ncfg_path = \"./cfg/yolov3-spp.cfg\"\n\nassert os.path.exists(train_annotation_dir), \"train_annotation_dir not exist!\"\nassert os.path.exists(val_annotation_dir), \"val_annotation_dir not exist!\"\nassert os.path.exists(classes_label), \"classes_label not exist!\"\nassert os.path.exists(cfg_path), \"cfg_path not exist!\"\n\n\ndef calculate_data_txt(txt_path, dataset_dir):\n    # create my_data.txt file that record image list\n    with open(txt_path, \"w\") as w:\n        for file_name in os.listdir(dataset_dir):\n            if file_name == \"classes.txt\":\n                continue\n\n            img_path = os.path.join(dataset_dir.replace(\"labels\", \"images\"),\n                                    file_name.split(\".\")[0]) + \".jpg\"\n            line = img_path + \"\\n\"\n            assert os.path.exists(img_path), \"file:{} not exist!\".format(img_path)\n            w.write(line)\n\n\ndef create_data_data(create_data_path, label_path, train_path, val_path, classes_info):\n    # create my_data.data file that record classes, train, valid and names info.\n    # shutil.copyfile(label_path, \"./data/my_data_label.names\")\n    with open(create_data_path, \"w\") as w:\n        w.write(\"classes={}\".format(len(classes_info)) + \"\\n\")  # 记录类别个数\n        w.write(\"train={}\".format(train_path) + \"\\n\")           # 记录训练集对应txt文件路径\n        w.write(\"valid={}\".format(val_path) + \"\\n\")             # 记录验证集对应txt文件路径\n        w.write(\"names=data/my_data_label.names\" + \"\\n\")        # 记录label.names文件路径\n\n\ndef change_and_create_cfg_file(classes_info, save_cfg_path=\"./cfg/my_yolov3.cfg\"):\n    # create my_yolov3.cfg file changed predictor filters and yolo classes param.\n    # this operation only deal with yolov3-spp.cfg\n    filters_lines = [636, 722, 809]\n    classes_lines = [643, 729, 816]\n    cfg_lines = open(cfg_path, \"r\").readlines()\n\n    for i in filters_lines:\n        assert \"filters\" in cfg_lines[i-1], \"filters param is not in line:{}\".format(i-1)\n        output_num = (5 + len(classes_info)) * 3\n        cfg_lines[i-1] = \"filters={}\\n\".format(output_num)\n\n    for i in classes_lines:\n        assert \"classes\" in cfg_lines[i-1], \"classes param is not in line:{}\".format(i-1)\n        cfg_lines[i-1] = \"classes={}\\n\".format(len(classes_info))\n\n    with open(save_cfg_path, \"w\") as w:\n        w.writelines(cfg_lines)\n\n\ndef main():\n    # 统计训练集和验证集的数据并生成相应txt文件\n    train_txt_path = \"data/my_train_data.txt\"\n    val_txt_path = \"data/my_val_data.txt\"\n    calculate_data_txt(train_txt_path, train_annotation_dir)\n    calculate_data_txt(val_txt_path, val_annotation_dir)\n\n    classes_info = [line.strip() for line in open(classes_label, \"r\").readlines() if len(line.strip()) > 0]\n    # 创建data.data文件，记录classes个数, train以及val数据集文件(.txt)路径和label.names文件路径\n    create_data_data(\"./data/my_data.data\", classes_label, train_txt_path, val_txt_path, classes_info)\n\n    # 根据yolov3-spp.cfg创建my_yolov3.cfg文件修改其中的predictor filters以及yolo classes参数(这两个参数是根据类别数改变的)\n    change_and_create_cfg_file(classes_info)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/cfg/hyp.yaml",
    "content": "# Hyperparameters for training\n\ngiou: 3.54  # giou loss gain\ncls: 37.4  # cls loss gain\ncls_pw: 1.0  # cls BCELoss positive_weight\nobj: 64.3  # obj loss gain (*=img_size/320 if img_size != 320)\nobj_pw: 1.0  # obj BCELoss positive_weight\niou_t: 0.20  # iou training threshold\nlr0: 0.001  # initial learning rate (SGD=5E-3 Adam=5E-4)\nlrf: 0.01  # final OneCycleLR learning rate (lr0 * lrf)\nmomentum: 0.937  # SGD momentum\nweight_decay: 0.0005  # optimizer weight decay\nfl_gamma: 0.0  # focal loss gamma (efficientDet default is gamma=1.5)\nhsv_h: 0.0138  # image HSV-Hue augmentation (fraction)\nhsv_s: 0.678  # image HSV-Saturation augmentation (fraction)\nhsv_v: 0.36  # image HSV-Value augmentation (fraction)\ndegrees: 0.  # image rotation (+/- deg)\ntranslate: 0.  # image translation (+/- fraction)\nscale: 0.  # image scale (+/- gain)\nshear: 0.  # image shear (+/- deg)"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/cfg/yolov3-spp.cfg",
    "content": "[net]\n# Testing\n# batch=1\n# subdivisions=1\n# Training\nbatch=64         \nsubdivisions=16  \nwidth=608        \nheight=608       \nchannels=3       \nmomentum=0.9     \ndecay=0.0005     \nangle=0          \nsaturation = 1.5  \nexposure = 1.5 \nhue=.1    \n\nlearning_rate=0.001  \nburn_in=1000   \nmax_batches = 500200 \npolicy=steps  \nsteps=400000,450000 \nscales=.1,.1  \n\n[convolutional]\nbatch_normalize=1 \nfilters=32    \nsize=3      \nstride=1       \npad=1        \nactivation=leaky  \n\n# Downsample\n\n[convolutional]    \nbatch_normalize=1\nfilters=64\nsize=3\nstride=2          \npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=32\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=64\nsize=3\nstride=1\npad=1\nactivation=leaky\n\n[shortcut]    \nfrom=-3      \nactivation=linear  \n\n# Downsample\n\n[convolutional]\nbatch_normalize=1\nfilters=128\nsize=3\nstride=2\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=64\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=128\nsize=3\nstride=1\npad=1\nactivation=leaky\n\n[shortcut]\nfrom=-3\nactivation=linear\n\n[convolutional]\nbatch_normalize=1\nfilters=64\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=128\nsize=3\nstride=1\npad=1\nactivation=leaky\n\n[shortcut]\nfrom=-3\nactivation=linear\n\n# Downsample\n\n[convolutional]\nbatch_normalize=1\nfilters=256\nsize=3\nstride=2\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=128\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=256\nsize=3\nstride=1\npad=1\nactivation=leaky\n\n[shortcut]\nfrom=-3\nactivation=linear\n\n[convolutional]\nbatch_normalize=1\nfilters=128\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=256\nsize=3\nstride=1\npad=1\nactivation=leaky\n\n[shortcut]\nfrom=-3\nactivation=linear\n\n[convolutional]\nbatch_normalize=1\nfilters=128\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=256\nsize=3\nstride=1\npad=1\nactivation=leaky\n\n[shortcut]\nfrom=-3\nactivation=linear\n\n[convolutional]\nbatch_normalize=1\nfilters=128\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=256\nsize=3\nstride=1\npad=1\nactivation=leaky\n\n[shortcut]\nfrom=-3\nactivation=linear\n\n\n[convolutional]\nbatch_normalize=1\nfilters=128\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=256\nsize=3\nstride=1\npad=1\nactivation=leaky\n\n[shortcut]\nfrom=-3\nactivation=linear\n\n[convolutional]\nbatch_normalize=1\nfilters=128\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=256\nsize=3\nstride=1\npad=1\nactivation=leaky\n\n[shortcut]\nfrom=-3\nactivation=linear\n\n[convolutional]\nbatch_normalize=1\nfilters=128\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=256\nsize=3\nstride=1\npad=1\nactivation=leaky\n\n[shortcut]\nfrom=-3\nactivation=linear\n\n[convolutional]\nbatch_normalize=1\nfilters=128\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=256\nsize=3\nstride=1\npad=1\nactivation=leaky\n\n[shortcut]\nfrom=-3\nactivation=linear\n\n# Downsample\n\n[convolutional]\nbatch_normalize=1\nfilters=512\nsize=3\nstride=2\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=256\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=512\nsize=3\nstride=1\npad=1\nactivation=leaky\n\n[shortcut]\nfrom=-3\nactivation=linear\n\n\n[convolutional]\nbatch_normalize=1\nfilters=256\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=512\nsize=3\nstride=1\npad=1\nactivation=leaky\n\n[shortcut]\nfrom=-3\nactivation=linear\n\n\n[convolutional]\nbatch_normalize=1\nfilters=256\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=512\nsize=3\nstride=1\npad=1\nactivation=leaky\n\n[shortcut]\nfrom=-3\nactivation=linear\n\n\n[convolutional]\nbatch_normalize=1\nfilters=256\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=512\nsize=3\nstride=1\npad=1\nactivation=leaky\n\n[shortcut]\nfrom=-3\nactivation=linear\n\n[convolutional]\nbatch_normalize=1\nfilters=256\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=512\nsize=3\nstride=1\npad=1\nactivation=leaky\n\n[shortcut]\nfrom=-3\nactivation=linear\n\n\n[convolutional]\nbatch_normalize=1\nfilters=256\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=512\nsize=3\nstride=1\npad=1\nactivation=leaky\n\n[shortcut]\nfrom=-3\nactivation=linear\n\n\n[convolutional]\nbatch_normalize=1\nfilters=256\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=512\nsize=3\nstride=1\npad=1\nactivation=leaky\n\n[shortcut]\nfrom=-3\nactivation=linear\n\n[convolutional]\nbatch_normalize=1\nfilters=256\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=512\nsize=3\nstride=1\npad=1\nactivation=leaky\n\n[shortcut]\nfrom=-3\nactivation=linear\n\n# Downsample\n\n[convolutional]\nbatch_normalize=1\nfilters=1024\nsize=3\nstride=2\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=512\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=1024\nsize=3\nstride=1\npad=1\nactivation=leaky\n\n[shortcut]\nfrom=-3\nactivation=linear\n\n[convolutional]\nbatch_normalize=1\nfilters=512\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=1024\nsize=3\nstride=1\npad=1\nactivation=leaky\n\n[shortcut]\nfrom=-3\nactivation=linear\n\n[convolutional]\nbatch_normalize=1\nfilters=512\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=1024\nsize=3\nstride=1\npad=1\nactivation=leaky\n\n[shortcut]\nfrom=-3\nactivation=linear\n\n[convolutional]\nbatch_normalize=1\nfilters=512\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=1024\nsize=3\nstride=1\npad=1\nactivation=leaky\n\n[shortcut]\nfrom=-3\nactivation=linear\n\n######################\n\n[convolutional]\nbatch_normalize=1\nfilters=512\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nsize=3\nstride=1\npad=1\nfilters=1024\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=512\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n### SPP ###\n[maxpool]\nstride=1\nsize=5\n\n[route]\nlayers=-2\n\n[maxpool]\nstride=1\nsize=9\n\n[route]\nlayers=-4\n\n[maxpool]\nstride=1\nsize=13\n\n[route]\nlayers=-1,-3,-5,-6\n\n### End SPP ###\n\n[convolutional]\nbatch_normalize=1\nfilters=512\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n\n[convolutional]\nbatch_normalize=1\nsize=3\nstride=1\npad=1\nfilters=1024\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=512\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nsize=3\nstride=1\npad=1\nfilters=1024\nactivation=leaky\n\n[convolutional]\nsize=1\nstride=1\npad=1\nfilters=255\nactivation=linear\n\n\n[yolo]\nmask = 6,7,8  \nanchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326\nclasses=80 \nnum=9\njitter=.3\nignore_thresh = .7\ntruth_thresh = 1\nrandom=1\n\n\n[route]\nlayers = -4\n\n[convolutional]\nbatch_normalize=1\nfilters=256\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[upsample]\nstride=2\n\n[route]\nlayers = -1, 61\n\n\n\n[convolutional]\nbatch_normalize=1\nfilters=256\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nsize=3\nstride=1\npad=1\nfilters=512\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=256\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nsize=3\nstride=1\npad=1\nfilters=512\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=256\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nsize=3\nstride=1\npad=1\nfilters=512\nactivation=leaky\n\n[convolutional]\nsize=1\nstride=1\npad=1\nfilters=255\nactivation=linear\n\n\n[yolo]\nmask = 3,4,5\nanchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326\nclasses=80\nnum=9\njitter=.3\nignore_thresh = .7\ntruth_thresh = 1\nrandom=1\n\n\n\n[route]\nlayers = -4\n\n[convolutional]\nbatch_normalize=1\nfilters=128\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[upsample]\nstride=2\n\n[route]\nlayers = -1, 36\n\n\n\n[convolutional]\nbatch_normalize=1\nfilters=128\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nsize=3\nstride=1\npad=1\nfilters=256\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=128\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nsize=3\nstride=1\npad=1\nfilters=256\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nfilters=128\nsize=1\nstride=1\npad=1\nactivation=leaky\n\n[convolutional]\nbatch_normalize=1\nsize=3\nstride=1\npad=1\nfilters=256\nactivation=leaky\n\n[convolutional]\nsize=1\nstride=1\npad=1\nfilters=255\nactivation=linear\n\n\n[yolo]\nmask = 0,1,2\nanchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326\nclasses=80\nnum=9\njitter=.3\nignore_thresh = .7\ntruth_thresh = 1\nrandom=1\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/draw_box_utils.py",
    "content": "from PIL.Image import Image, fromarray\nimport PIL.ImageDraw as ImageDraw\nimport PIL.ImageFont as ImageFont\nfrom PIL import ImageColor\nimport numpy as np\n\nSTANDARD_COLORS = [\n    'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',\n    'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',\n    'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',\n    'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',\n    'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',\n    'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',\n    'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',\n    'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',\n    'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',\n    'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',\n    'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',\n    'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',\n    'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',\n    'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',\n    'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',\n    'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',\n    'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',\n    'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',\n    'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',\n    'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',\n    'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',\n    'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',\n    'WhiteSmoke', 'Yellow', 'YellowGreen'\n]\n\n\ndef draw_text(draw,\n              box: list,\n              cls: int,\n              score: float,\n              category_index: dict,\n              color: str,\n              font: str = 'arial.ttf',\n              font_size: int = 24):\n    \"\"\"\n    将目标边界框和类别信息绘制到图片上\n    \"\"\"\n    try:\n        font = ImageFont.truetype(font, font_size)\n    except IOError:\n        font = ImageFont.load_default()\n\n    left, top, right, bottom = box\n    # If the total height of the display strings added to the top of the bounding\n    # box exceeds the top of the image, stack the strings below the bounding box\n    # instead of above.\n    display_str = f\"{category_index[str(cls)]}: {int(100 * score)}%\"\n    display_str_heights = [font.getsize(ds)[1] for ds in display_str]\n    # Each display_str has a top and bottom margin of 0.05x.\n    display_str_height = (1 + 2 * 0.05) * max(display_str_heights)\n\n    if top > display_str_height:\n        text_top = top - display_str_height\n        text_bottom = top\n    else:\n        text_top = bottom\n        text_bottom = bottom + display_str_height\n\n    for ds in display_str:\n        text_width, text_height = font.getsize(ds)\n        margin = np.ceil(0.05 * text_width)\n        draw.rectangle([(left, text_top),\n                        (left + text_width + 2 * margin, text_bottom)], fill=color)\n        draw.text((left + margin, text_top),\n                  ds,\n                  fill='black',\n                  font=font)\n        left += text_width\n\n\ndef draw_masks(image, masks, colors, thresh: float = 0.7, alpha: float = 0.5):\n    np_image = np.array(image)\n    masks = np.where(masks > thresh, True, False)\n\n    # colors = np.array(colors)\n    img_to_draw = np.copy(np_image)\n    # TODO: There might be a way to vectorize this\n    for mask, color in zip(masks, colors):\n        img_to_draw[mask] = color\n\n    out = np_image * (1 - alpha) + img_to_draw * alpha\n    return fromarray(out.astype(np.uint8))\n\n\ndef draw_objs(image: Image,\n              boxes: np.ndarray = None,\n              classes: np.ndarray = None,\n              scores: np.ndarray = None,\n              masks: np.ndarray = None,\n              category_index: dict = None,\n              box_thresh: float = 0.1,\n              mask_thresh: float = 0.5,\n              line_thickness: int = 8,\n              font: str = 'arial.ttf',\n              font_size: int = 24,\n              draw_boxes_on_image: bool = True,\n              draw_masks_on_image: bool = False):\n    \"\"\"\n    将目标边界框信息，类别信息，mask信息绘制在图片上\n    Args:\n        image: 需要绘制的图片\n        boxes: 目标边界框信息\n        classes: 目标类别信息\n        scores: 目标概率信息\n        masks: 目标mask信息\n        category_index: 类别与名称字典\n        box_thresh: 过滤的概率阈值\n        mask_thresh:\n        line_thickness: 边界框宽度\n        font: 字体类型\n        font_size: 字体大小\n        draw_boxes_on_image:\n        draw_masks_on_image:\n\n    Returns:\n\n    \"\"\"\n\n    # 过滤掉低概率的目标\n    idxs = np.greater(scores, box_thresh)\n    boxes = boxes[idxs]\n    classes = classes[idxs]\n    scores = scores[idxs]\n    if masks is not None:\n        masks = masks[idxs]\n    if len(boxes) == 0:\n        return image\n\n    colors = [ImageColor.getrgb(STANDARD_COLORS[cls % len(STANDARD_COLORS)]) for cls in classes]\n\n    if draw_boxes_on_image:\n        # Draw all boxes onto image.\n        draw = ImageDraw.Draw(image)\n        for box, cls, score, color in zip(boxes, classes, scores, colors):\n            left, top, right, bottom = box\n            # 绘制目标边界框\n            draw.line([(left, top), (left, bottom), (right, bottom),\n                       (right, top), (left, top)], width=line_thickness, fill=color)\n            # 绘制类别和概率信息\n            draw_text(draw, box.tolist(), int(cls), float(score), category_index, color, font, font_size)\n\n    if draw_masks_on_image and (masks is not None):\n        # Draw all mask onto image.\n        image = draw_masks(image, masks, colors, mask_thresh)\n\n    return image\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/export_onnx.py",
    "content": "import os\nimport torch\nimport cv2\nimport torch.onnx\nimport onnx\nimport onnxruntime\nimport numpy as np\nimport models\nfrom build_utils import img_utils\n\ndevice = torch.device(\"cpu\")\nmodels.ONNX_EXPORT = True\n\n\ndef to_numpy(tensor):\n    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()\n\n\ndef main():\n    img_size = 512  # 必须是32的整数倍 [416, 512, 608]\n    cfg = \"cfg/yolov3-spp.cfg\"\n    weights = \"weights/yolov3-spp-ultralytics-{}.pt\".format(img_size)\n    assert os.path.exists(cfg), \"cfg file does not exist...\"\n    assert os.path.exists(weights), \"weights file does not exist...\"\n\n    input_size = (img_size, img_size)  # [h, w]\n\n    # create model\n    model = models.Darknet(cfg, input_size)\n    # load model weights\n    model.load_state_dict(torch.load(weights, map_location=device)[\"model\"])\n    model.to(device)\n    model.eval()\n    # input to the model\n    # [batch, channel, height, width]\n    # x = torch.rand(1, 3, *input_size, requires_grad=True)\n    img_path = \"test.jpg\"\n    img_o = cv2.imread(img_path)  # BGR\n    assert img_o is not None, \"Image Not Found \" + img_path\n\n    # preprocessing img\n    img = img_utils.letterbox(img_o, new_shape=input_size, auto=False, color=(0, 0, 0))[0]\n    # Convert\n    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416\n    img = np.ascontiguousarray(img).astype(np.float32)\n\n    img /= 255.0  # scale (0, 255) to (0, 1)\n    img = np.expand_dims(img, axis=0)  # add batch dimension\n    x = torch.tensor(img)\n    torch_out = model(x)\n\n    save_path = \"yolov3spp.onnx\"\n    # export the model\n    torch.onnx.export(model,                       # model being run\n                      x,                           # model input (or a tuple for multiple inputs)\n                      save_path,                   # where to save the model (can be a file or file-like object)\n                      export_params=True,          # store the trained parameter weights inside the model file\n                      opset_version=12,            # the ONNX version to export the model to\n                      do_constant_folding=True,    # whether to execute constant folding for optimization\n                      input_names=[\"images\"],       # the model's input names\n                      # output_names=[\"classes\", \"boxes\"],     # the model's output names\n                      output_names=[\"prediction\"],\n                      dynamic_axes={\"images\": {0: \"batch_size\"},  # variable length axes\n                                    \"prediction\": {0: \"batch_size\"}})\n                                    # \"classes\": {0: \"batch_size\"},\n                                    # \"confidence\": {0: \"batch_size\"},\n                                    # \"boxes\": {0: \"batch_size\"}})\n\n    # check onnx model\n    onnx_model = onnx.load(save_path)\n    onnx.checker.check_model(onnx_model)\n    # print(onnx.helper.printable_graph(onnx_model.graph))\n\n    ort_session = onnxruntime.InferenceSession(save_path)\n\n    # compute ONNX Runtime output prediction\n    ort_inputs = {\"images\": to_numpy(x)}\n    ort_outs = ort_session.run(None, ort_inputs)\n\n    # compare ONNX Runtime and Pytorch results\n    # assert_allclose: Raises an AssertionError if two objects are not equal up to desired tolerance.\n    np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05)\n    # np.testing.assert_allclose(to_numpy(torch_out[1]), ort_outs[1], rtol=1e-03, atol=1e-05)\n    # np.testing.assert_allclose(to_numpy(torch_out[2]), ort_outs[2], rtol=1e-03, atol=1e-05)\n    print(\"Exported model has been tested with ONNXRuntime, and the result looks good!\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/load_onnx_test.py",
    "content": "import time\nimport cv2\nimport onnx\nimport onnxruntime\nimport numpy as np\nfrom matplotlib import pyplot as plt\nfrom draw_box_utils import draw_box\n\n\ndef to_numpy(tensor):\n    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()\n\n\ndef scale_img(img: np.ndarray,\n              new_shape=(416, 416),\n              color=(114, 114, 114),\n              auto=True,\n              scale_fill=False,\n              scale_up=True):\n    \"\"\"\n    将图片缩放调整到指定大小，若需要填充，均匀填充到上下左右侧\n    :param img: 输入的图像numpy格式\n    :param new_shape: 输入网络的shape\n    :param color: padding用什么颜色填充\n    :param auto: 将输入网络的较小边长调整到最近的64整数倍(输入图像的比例不变)，这样输入网络的尺寸比指定尺寸要小，计算量也会减小\n    :param scale_fill: 简单粗暴缩放到指定大小\n    :param scale_up:  只缩小，不放大\n    :return:\n    \"\"\"\n\n    shape = img.shape[:2]  # [h, w]\n    if isinstance(new_shape, int):\n        new_shape = (new_shape, new_shape)\n\n    # scale ratio (new / old)\n    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])\n    if not scale_up:  # only scale down, do not scale up (for better test mAP) 对于大于指定输入大小的图片进行缩放,小于的不变\n        r = min(r, 1.0)\n\n    # compute padding\n    ratio = r, r  # width, height ratios\n    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))\n    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding\n    if auto:  # minimun rectangle 保证原图比例不变，将图像最大边缩放到指定大小\n        # 这里的取余操作可以保证padding后的图片是32的整数倍(416x416)，如果是(512x512)可以保证是64的整数倍\n        dw, dh = np.mod(dw, 64), np.mod(dh, 64)  # wh padding\n    elif scale_fill:  # stretch 简单粗暴的将图片缩放到指定尺寸\n        dw, dh = 0, 0\n        new_unpad = new_shape\n        ratio = new_shape[0] / shape[1], new_shape[1] / shape[0]  # wh ratios\n\n    dw /= 2  # divide padding into 2 sides 将padding分到上下，左右两侧\n    dh /= 2\n\n    # shape:[h, w]  new_unpad:[w, h]\n    if shape[::-1] != new_unpad:\n        img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)\n    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))  # 计算上下两侧的padding\n    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))  # 计算左右两侧的padding\n\n    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border\n    return img, ratio, (dw, dh)\n\n\ndef clip_coords(boxes: np.ndarray, img_shape: tuple):\n    # Clip bounding xyxy bounding boxes to image shape (height, width)\n    boxes[:, 0].clip(0, img_shape[1])  # x1\n    boxes[:, 1].clip(0, img_shape[0])  # y1\n    boxes[:, 2].clip(0, img_shape[1])  # x2\n    boxes[:, 3].clip(0, img_shape[0])  # y2\n\n\ndef turn_back_coords(img1_shape, coords, img0_shape, ratio_pad=None):\n    \"\"\"\n    将预测的坐标信息转换回原图尺度\n    :param img1_shape: 缩放后的图像尺度\n    :param coords: 预测的box信息\n    :param img0_shape: 缩放前的图像尺度\n    :param ratio_pad: 缩放过程中的缩放比例以及pad\n    :return:\n    \"\"\"\n    # Rescale coords (xyxy) from img1_shape to img0_shape\n    if ratio_pad is None:  # calculate from img0_shape\n        gain = max(img1_shape) / max(img0_shape)  # gain  = old / new\n        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding\n    else:\n        gain = ratio_pad[0][0]\n        pad = ratio_pad[1]\n\n    coords[:, [0, 2]] -= pad[0]  # x padding\n    coords[:, [1, 3]] -= pad[1]  # y padding\n    coords[:, :4] /= gain\n    clip_coords(coords, img0_shape)\n    return coords\n\n\ndef xywh2xyxy(x: np.ndarray):\n    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right\n    y = np.zeros_like(x)\n    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x\n    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y\n    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x\n    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y\n    return y\n\n\ndef bboxes_iou(boxes1: np.ndarray, boxes2: np.ndarray) -> np.ndarray:\n    boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])\n    boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])\n\n    left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])\n    right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])\n\n    inter_section = np.maximum(right_down - left_up, 0.0)\n    inter_area = inter_section[..., 0] * inter_section[..., 1]\n    union_area = boxes1_area + boxes2_area - inter_area\n    ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)\n\n    return ious\n\n\ndef nms(bboxes: np.ndarray, iou_threshold=0.5, soft_threshold=0.3, sigma=0.5, method=\"nms\", ) -> np.ndarray:\n    \"\"\"\n    单独对一个类别进行NMS处理\n    :param bboxes: [x1, y1, x2, y2, score]\n    :param iou_threshold: nms算法中使用到的阈值\n    :param soft_threshold: soft-nms算法中使用到的阈值\n    :param sigma: soft-nms gaussian sigma\n    :param method: nms或者soft-nms\n    :return: 返回保留目标的索引\n    \"\"\"\n    assert method in [\"nms\", \"soft-nms\"]\n    # [x1, y1, x2, y2, score] -> [x1, y1, x2, y2, score, index]\n    bboxes = np.concatenate([bboxes, np.arange(bboxes.shape[0]).reshape(-1, 1)], axis=1)\n\n    best_bboxes_index = []\n    while len(bboxes) > 0:\n        max_ind = np.argmax(bboxes[:, 4])  # 寻找概率最大目标索引\n        best_bbox = bboxes[max_ind]\n        best_bboxes_index.append(best_bbox[5])\n        bboxes = np.concatenate([bboxes[:max_ind], bboxes[max_ind + 1:]])  # 将最大概率目标去除\n        ious = bboxes_iou(best_bbox[np.newaxis, :4], bboxes[:, :4])\n\n        if method == \"nms\":\n            iou_mask = np.less(ious, iou_threshold)  # <\n        else:  # soft-nms\n            weight = np.exp(-(np.square(ious) / sigma))\n            bboxes[:, 4] = bboxes[:, 4] * weight\n            iou_mask = np.greater(bboxes[:, 4], soft_threshold)  # >\n\n        bboxes = bboxes[iou_mask]\n\n    return np.array(best_bboxes_index, dtype=np.int32)\n\n\ndef post_process(pred: np.ndarray, multi_label=False, conf_thres=0.3):\n    \"\"\"\n    输入的xywh都是归一化后的值\n    :param pred: [num_obj, [x1, y1, x2, y2, objectness, cls1, cls1...]]\n    :param img_size:\n    :param multi_label:\n    :param conf_thres:\n    :return:\n    \"\"\"\n    min_wh, max_wh = 2, 4096\n    pred = pred[pred[:, 4] > conf_thres]  # 虑除小objectness目标\n    pred = pred[((pred[:, 2:4] > min_wh) & (pred[:, 2:4] < max_wh)).all(1)]  # 虑除规定尺度范围外的目标\n\n    if pred.shape[0] == 0:\n        return np.empty((0, 6))  # [x, y, x, y, score, class]\n\n    box = xywh2xyxy(pred[:, :4])\n    # Detections matrix nx6 (xyxy, conf, cls)\n    if multi_label:  # 针对每个类别执行非极大值抑制\n        # i, j = (x[:, 5:] > conf_thres).nonzero().t()\n        # x = torch.cat((box[i], x[i, j + 5].unsqueeze(1), j.float().unsqueeze(1)), 1)\n        pass\n    else:  # best class only  直接针对每个类别中概率最大的类别进行非极大值抑制处理\n        objectness = pred[:, 5:]\n        class_index = np.argmax(objectness, axis=1)\n        conf = objectness[(np.arange(pred.shape[0]), class_index)]\n        # conf, j = predictions[:, 5:].max(1)\n        pred = np.concatenate((box,\n                               np.expand_dims(conf, axis=1),\n                               np.expand_dims(class_index, axis=1)), 1)[conf > conf_thres]\n\n    n = pred.shape[0]  # number of boxes\n    if n == 0:\n        return np.empty((0, 6))  # [x, y, x, y, score, class]\n\n    cls = pred[:, 5]  # classes\n    boxes, scores = pred[:, :4] + cls.reshape(-1, 1) * max_wh, pred[:, 4:5]\n    t1 = time.time()\n    indexes = nms(np.concatenate([boxes, scores], axis=1))\n    print(\"NMS time is {}\".format(time.time() - t1))\n    pred = pred[indexes]\n\n    return pred\n\n\ndef main():\n    img_size = 512\n    save_path = \"yolov3spp.onnx\"\n    img_path = \"test.jpg\"\n    input_size = (img_size, img_size)  # h, w\n\n    # check onnx model\n    onnx_model = onnx.load(save_path)\n    onnx.checker.check_model(onnx_model)\n    # print(onnx.helper.printable_graph(onnx_model.graph))\n    ort_session = onnxruntime.InferenceSession(save_path)\n\n    img_o = cv2.imread(img_path)  # BGR\n    assert img_o is not None, \"Image Not Found \" + img_path\n\n    # preprocessing img\n    img, ratio, pad = scale_img(img_o, new_shape=input_size, auto=False, color=(0, 0, 0))\n    # Convert\n    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416\n    img = np.ascontiguousarray(img).astype(np.float32)\n\n    img /= 255.0  # scale (0, 255) to (0, 1)\n    img = np.expand_dims(img, axis=0)  # add batch dimension\n\n    # compute ONNX Runtime output prediction\n    ort_inputs = {\"images\": img}\n\n    t1 = time.time()\n    # prediction: [num_obj, 85]\n    pred = ort_session.run(None, ort_inputs)[0]\n    t2 = time.time()\n    print(t2 - t1)\n    # print(predictions.shape[0])\n    # process detections\n    # 这里预测的数值是相对坐标(0-1之间)，乘上图像尺寸转回绝对坐标\n    pred[:, [0, 2]] *= input_size[1]\n    pred[:, [1, 3]] *= input_size[0]\n    pred = post_process(pred)\n\n    # 将预测的bbox缩放回原图像尺度\n    p_boxes = turn_back_coords(img1_shape=img.shape[2:],\n                               coords=pred[:, :4],\n                               img0_shape=img_o.shape,\n                               ratio_pad=[ratio, pad]).round()\n    # print(p_boxes.shape)\n\n    bboxes = p_boxes\n    scores = pred[:, 4]\n    classes = pred[:, 5].astype(np.int) + 1\n\n    category_index = dict([(i + 1, str(i + 1)) for i in range(90)])\n    img_o = draw_box(img_o[:, :, ::-1], bboxes, classes, scores, category_index)\n    plt.imshow(img_o)\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/models.py",
    "content": "from build_utils.layers import *\nfrom build_utils.parse_config import *\n\nONNX_EXPORT = False\n\n\ndef create_modules(modules_defs: list, img_size):\n    \"\"\"\n    Constructs module list of layer blocks from module configuration in module_defs\n    :param modules_defs: 通过.cfg文件解析得到的每个层结构的列表\n    :param img_size:\n    :return:\n    \"\"\"\n\n    img_size = [img_size] * 2 if isinstance(img_size, int) else img_size\n    # 删除解析cfg列表中的第一个配置(对应[net]的配置)\n    modules_defs.pop(0)  # cfg training hyperparams (unused)\n    output_filters = [3]  # input channels\n    module_list = nn.ModuleList()\n    # 统计哪些特征层的输出会被后续的层使用到(可能是特征融合，也可能是拼接)\n    routs = []  # list of layers which rout to deeper layers\n    yolo_index = -1\n\n    # 遍历搭建每个层结构\n    for i, mdef in enumerate(modules_defs):\n        modules = nn.Sequential()\n\n        if mdef[\"type\"] == \"convolutional\":\n            bn = mdef[\"batch_normalize\"]  # 1 or 0 / use or not\n            filters = mdef[\"filters\"]\n            k = mdef[\"size\"]  # kernel size\n            stride = mdef[\"stride\"] if \"stride\" in mdef else (mdef['stride_y'], mdef[\"stride_x\"])\n            if isinstance(k, int):\n                modules.add_module(\"Conv2d\", nn.Conv2d(in_channels=output_filters[-1],\n                                                       out_channels=filters,\n                                                       kernel_size=k,\n                                                       stride=stride,\n                                                       padding=k // 2 if mdef[\"pad\"] else 0,\n                                                       bias=not bn))\n            else:\n                raise TypeError(\"conv2d filter size must be int type.\")\n\n            if bn:\n                modules.add_module(\"BatchNorm2d\", nn.BatchNorm2d(filters))\n            else:\n                # 如果该卷积操作没有bn层，意味着该层为yolo的predictor\n                routs.append(i)  # detection output (goes into yolo layer)\n\n            if mdef[\"activation\"] == \"leaky\":\n                modules.add_module(\"activation\", nn.LeakyReLU(0.1, inplace=True))\n            else:\n                pass\n\n        elif mdef[\"type\"] == \"BatchNorm2d\":\n            pass\n\n        elif mdef[\"type\"] == \"maxpool\":\n            k = mdef[\"size\"]  # kernel size\n            stride = mdef[\"stride\"]\n            modules = nn.MaxPool2d(kernel_size=k, stride=stride, padding=(k - 1) // 2)\n\n        elif mdef[\"type\"] == \"upsample\":\n            if ONNX_EXPORT:  # explicitly state size, avoid scale_factor\n                g = (yolo_index + 1) * 2 / 32  # gain\n                modules = nn.Upsample(size=tuple(int(x * g) for x in img_size))\n            else:\n                modules = nn.Upsample(scale_factor=mdef[\"stride\"])\n\n        elif mdef[\"type\"] == \"route\":  # [-2],  [-1,-3,-5,-6], [-1, 61]\n            layers = mdef[\"layers\"]\n            filters = sum([output_filters[l + 1 if l > 0 else l] for l in layers])\n            routs.extend([i + l if l < 0 else l for l in layers])\n            modules = FeatureConcat(layers=layers)\n\n        elif mdef[\"type\"] == \"shortcut\":\n            layers = mdef[\"from\"]\n            filters = output_filters[-1]\n            # routs.extend([i + l if l < 0 else l for l in layers])\n            routs.append(i + layers[0])\n            modules = WeightedFeatureFusion(layers=layers, weight=\"weights_type\" in mdef)\n\n        elif mdef[\"type\"] == \"yolo\":\n            yolo_index += 1  # 记录是第几个yolo_layer [0, 1, 2]\n            stride = [32, 16, 8]  # 预测特征层对应原图的缩放比例\n\n            modules = YOLOLayer(anchors=mdef[\"anchors\"][mdef[\"mask\"]],  # anchor list\n                                nc=mdef[\"classes\"],  # number of classes\n                                img_size=img_size,\n                                stride=stride[yolo_index])\n\n            # Initialize preceding Conv2d() bias (https://arxiv.org/pdf/1708.02002.pdf section 3.3)\n            try:\n                j = -1\n                # bias: shape(255,) 索引0对应Sequential中的Conv2d\n                # view: shape(3, 85)\n                b = module_list[j][0].bias.view(modules.na, -1)\n                b.data[:, 4] += -4.5  # obj\n                b.data[:, 5:] += math.log(0.6 / (modules.nc - 0.99))  # cls (sigmoid(p) = 1/nc)\n                module_list[j][0].bias = torch.nn.Parameter(b.view(-1), requires_grad=True)\n            except Exception as e:\n                print('WARNING: smart bias initialization failure.', e)\n        else:\n            print(\"Warning: Unrecognized Layer Type: \" + mdef[\"type\"])\n\n        # Register module list and number of output filters\n        module_list.append(modules)\n        output_filters.append(filters)\n\n    routs_binary = [False] * len(modules_defs)\n    for i in routs:\n        routs_binary[i] = True\n    return module_list, routs_binary\n\n\nclass YOLOLayer(nn.Module):\n    \"\"\"\n    对YOLO的输出进行处理\n    \"\"\"\n    def __init__(self, anchors, nc, img_size, stride):\n        super(YOLOLayer, self).__init__()\n        self.anchors = torch.Tensor(anchors)\n        self.stride = stride  # layer stride 特征图上一步对应原图上的步距 [32, 16, 8]\n        self.na = len(anchors)  # number of anchors (3)\n        self.nc = nc  # number of classes (80)\n        self.no = nc + 5  # number of outputs (85: x, y, w, h, obj, cls1, ...)\n        self.nx, self.ny, self.ng = 0, 0, (0, 0)  # initialize number of x, y gridpoints\n        # 将anchors大小缩放到grid尺度\n        self.anchor_vec = self.anchors / self.stride\n        # batch_size, na, grid_h, grid_w, wh,\n        # 值为1的维度对应的值不是固定值，后续操作可根据broadcast广播机制自动扩充\n        self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2)\n        self.grid = None\n\n        if ONNX_EXPORT:\n            self.training = False\n            self.create_grids((img_size[1] // stride, img_size[0] // stride))  # number x, y grid points\n\n    def create_grids(self, ng=(13, 13), device=\"cpu\"):\n        \"\"\"\n        更新grids信息并生成新的grids参数\n        :param ng: 特征图大小\n        :param device:\n        :return:\n        \"\"\"\n        self.nx, self.ny = ng\n        self.ng = torch.tensor(ng, dtype=torch.float)\n\n        # build xy offsets 构建每个cell处的anchor的xy偏移量(在feature map上的)\n        if not self.training:  # 训练模式不需要回归到最终预测boxes\n            yv, xv = torch.meshgrid([torch.arange(self.ny, device=device),\n                                     torch.arange(self.nx, device=device)])\n            # batch_size, na, grid_h, grid_w, wh\n            self.grid = torch.stack((xv, yv), 2).view((1, 1, self.ny, self.nx, 2)).float()\n\n        if self.anchor_vec.device != device:\n            self.anchor_vec = self.anchor_vec.to(device)\n            self.anchor_wh = self.anchor_wh.to(device)\n\n    def forward(self, p):\n        if ONNX_EXPORT:\n            bs = 1  # batch size\n        else:\n            bs, _, ny, nx = p.shape  # batch_size, predict_param(255), grid(13), grid(13)\n            if (self.nx, self.ny) != (nx, ny) or self.grid is None:  # fix no grid bug\n                self.create_grids((nx, ny), p.device)\n\n        # view: (batch_size, 255, 13, 13) -> (batch_size, 3, 85, 13, 13)\n        # permute: (batch_size, 3, 85, 13, 13) -> (batch_size, 3, 13, 13, 85)\n        # [bs, anchor, grid, grid, xywh + obj + classes]\n        p = p.view(bs, self.na, self.no, self.ny, self.nx).permute(0, 1, 3, 4, 2).contiguous()  # prediction\n\n        if self.training:\n            return p\n        elif ONNX_EXPORT:\n            # Avoid broadcasting for ANE operations\n            m = self.na * self.nx * self.ny  # 3*\n            ng = 1. / self.ng.repeat(m, 1)\n            grid = self.grid.repeat(1, self.na, 1, 1, 1).view(m, 2)\n            anchor_wh = self.anchor_wh.repeat(1, 1, self.nx, self.ny, 1).view(m, 2) * ng\n\n            p = p.view(m, self.no)\n            # xy = torch.sigmoid(p[:, 0:2]) + grid  # x, y\n            # wh = torch.exp(p[:, 2:4]) * anchor_wh  # width, height\n            # p_cls = torch.sigmoid(p[:, 4:5]) if self.nc == 1 else \\\n            #     torch.sigmoid(p[:, 5:self.no]) * torch.sigmoid(p[:, 4:5])  # conf\n            p[:, :2] = (torch.sigmoid(p[:, 0:2]) + grid) * ng  # x, y\n            p[:, 2:4] = torch.exp(p[:, 2:4]) * anchor_wh  # width, height\n            p[:, 4:] = torch.sigmoid(p[:, 4:])\n            p[:, 5:] = p[:, 5:self.no] * p[:, 4:5]\n            return p\n        else:  # inference\n            # [bs, anchor, grid, grid, xywh + obj + classes]\n            io = p.clone()  # inference output\n            io[..., :2] = torch.sigmoid(io[..., :2]) + self.grid  # xy 计算在feature map上的xy坐标\n            io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh  # wh yolo method 计算在feature map上的wh\n            io[..., :4] *= self.stride  # 换算映射回原图尺度\n            torch.sigmoid_(io[..., 4:])\n            return io.view(bs, -1, self.no), p  # view [1, 3, 13, 13, 85] as [1, 507, 85]\n\n\nclass Darknet(nn.Module):\n    \"\"\"\n    YOLOv3 spp object detection model\n    \"\"\"\n    def __init__(self, cfg, img_size=(416, 416), verbose=False):\n        super(Darknet, self).__init__()\n        # 这里传入的img_size只在导出ONNX模型时起作用\n        self.input_size = [img_size] * 2 if isinstance(img_size, int) else img_size\n        # 解析网络对应的.cfg文件\n        self.module_defs = parse_model_cfg(cfg)\n        # 根据解析的网络结构一层一层去搭建\n        self.module_list, self.routs = create_modules(self.module_defs, img_size)\n        # 获取所有YOLOLayer层的索引\n        self.yolo_layers = get_yolo_layers(self)\n\n        # 打印下模型的信息，如果verbose为True则打印详细信息\n        self.info(verbose) if not ONNX_EXPORT else None  # print model description\n\n    def forward(self, x, verbose=False):\n        return self.forward_once(x, verbose=verbose)\n\n    def forward_once(self, x, verbose=False):\n        # yolo_out收集每个yolo_layer层的输出\n        # out收集每个模块的输出\n        yolo_out, out = [], []\n        if verbose:\n            print('0', x.shape)\n            str = \"\"\n\n        for i, module in enumerate(self.module_list):\n            name = module.__class__.__name__\n            if name in [\"WeightedFeatureFusion\", \"FeatureConcat\"]:  # sum, concat\n                if verbose:\n                    l = [i - 1] + module.layers  # layers\n                    sh = [list(x.shape)] + [list(out[i].shape) for i in module.layers]  # shapes\n                    str = ' >> ' + ' + '.join(['layer %g %s' % x for x in zip(l, sh)])\n                x = module(x, out)  # WeightedFeatureFusion(), FeatureConcat()\n            elif name == \"YOLOLayer\":\n                yolo_out.append(module(x))\n            else:  # run module directly, i.e. mtype = 'convolutional', 'upsample', 'maxpool', 'batchnorm2d' etc.\n                x = module(x)\n\n            out.append(x if self.routs[i] else [])\n            if verbose:\n                print('%g/%g %s -' % (i, len(self.module_list), name), list(x.shape), str)\n                str = ''\n\n        if self.training:  # train\n            return yolo_out\n        elif ONNX_EXPORT:  # export\n            # x = [torch.cat(x, 0) for x in zip(*yolo_out)]\n            # return x[0], torch.cat(x[1:3], 1)  # scores, boxes: 3780x80, 3780x4\n            p = torch.cat(yolo_out, dim=0)\n\n            # # 根据objectness虑除低概率目标\n            # mask = torch.nonzero(torch.gt(p[:, 4], 0.1), as_tuple=False).squeeze(1)\n            # # onnx不支持超过一维的索引（pytorch太灵活了）\n            # # p = p[mask]\n            # p = torch.index_select(p, dim=0, index=mask)\n            #\n            # # 虑除小面积目标，w > 2 and h > 2 pixel\n            # # ONNX暂不支持bitwise_and和all操作\n            # mask_s = torch.gt(p[:, 2], 2./self.input_size[0]) & torch.gt(p[:, 3], 2./self.input_size[1])\n            # mask_s = torch.nonzero(mask_s, as_tuple=False).squeeze(1)\n            # p = torch.index_select(p, dim=0, index=mask_s)  # width-height 虑除小目标\n            #\n            # if mask_s.numel() == 0:\n            #     return torch.empty([0, 85])\n\n            return p\n        else:  # inference or test\n            x, p = zip(*yolo_out)  # inference output, training output\n            x = torch.cat(x, 1)  # cat yolo outputs\n\n            return x, p\n\n    def info(self, verbose=False):\n        \"\"\"\n        打印模型的信息\n        :param verbose:\n        :return:\n        \"\"\"\n        torch_utils.model_info(self, verbose)\n\n\ndef get_yolo_layers(self):\n    \"\"\"\n    获取网络中三个\"YOLOLayer\"模块对应的索引\n    :param self:\n    :return:\n    \"\"\"\n    return [i for i, m in enumerate(self.module_list) if m.__class__.__name__ == 'YOLOLayer']  # [89, 101, 113]\n\n\n\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/predict_test.py",
    "content": "import os\nimport json\nimport time\n\nimport torch\nimport cv2\nimport numpy as np\nfrom matplotlib import pyplot as plt\nfrom PIL import Image\n\nfrom build_utils import img_utils, torch_utils, utils\nfrom models import Darknet\nfrom draw_box_utils import draw_objs\n\n\ndef main():\n    img_size = 512  # 必须是32的整数倍 [416, 512, 608]\n    cfg = \"cfg/my_yolov3.cfg\"  # 改成生成的.cfg文件\n    weights_path = \"weights/yolov3spp-voc-512.pt\"  # 改成自己训练好的权重文件\n    json_path = \"./data/pascal_voc_classes.json\"  # json标签文件\n    img_path = \"test.jpg\"\n    assert os.path.exists(cfg), \"cfg file {} dose not exist.\".format(cfg)\n    assert os.path.exists(weights), \"weights file {} dose not exist.\".format(weights)\n    assert os.path.exists(json_path), \"json file {} dose not exist.\".format(json_path)\n    assert os.path.exists(img_path), \"image file {} dose not exist.\".format(img_path)\n\n    with open(json_path, 'r') as f:\n        class_dict = json.load(f)\n\n    category_index = {str(v): str(k) for k, v in class_dict.items()}\n\n    input_size = (img_size, img_size)\n\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n\n    model = Darknet(cfg, img_size)\n    weights_dict = torch.load(weights_path, map_location='cpu')\n    weights_dict = weights_dict[\"model\"] if \"model\" in weights_dict else weights_dict\n    model.load_state_dict(weights_dict)\n    model.to(device)\n\n    model.eval()\n    with torch.no_grad():\n        # init\n        img = torch.zeros((1, 3, img_size, img_size), device=device)\n        model(img)\n\n        img_o = cv2.imread(img_path)  # BGR\n        assert img_o is not None, \"Image Not Found \" + img_path\n\n        img = img_utils.letterbox(img_o, new_shape=input_size, auto=True, color=(0, 0, 0))[0]\n        # Convert\n        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416\n        img = np.ascontiguousarray(img)\n\n        img = torch.from_numpy(img).to(device).float()\n        img /= 255.0  # scale (0, 255) to (0, 1)\n        img = img.unsqueeze(0)  # add batch dimension\n\n        t1 = torch_utils.time_synchronized()\n        pred = model(img)[0]  # only get inference result\n        t2 = torch_utils.time_synchronized()\n        print(t2 - t1)\n\n        pred = utils.non_max_suppression(pred, conf_thres=0.1, iou_thres=0.6, multi_label=True)[0]\n        t3 = time.time()\n        print(t3 - t2)\n\n        if pred is None:\n            print(\"No target detected.\")\n            exit(0)\n\n        # process detections\n        pred[:, :4] = utils.scale_coords(img.shape[2:], pred[:, :4], img_o.shape).round()\n        print(pred.shape)\n\n        bboxes = pred[:, :4].detach().cpu().numpy()\n        scores = pred[:, 4].detach().cpu().numpy()\n        classes = pred[:, 5].detach().cpu().numpy().astype(np.int) + 1\n\n        pil_img = Image.fromarray(img_o[:, :, ::-1])\n        plot_img = draw_objs(pil_img,\n                             bboxes,\n                             classes,\n                             scores,\n                             category_index=category_index,\n                             box_thresh=0.2,\n                             line_thickness=3,\n                             font='arial.ttf',\n                             font_size=20)\n        plt.imshow(plot_img)\n        plt.show()\n        # 保存预测的图片结果\n        plot_img.save(\"test_result.jpg\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/requirements.txt",
    "content": "numpy\nopencv_python==4.3.0.36\nlxml\ntorch==1.7.1\ntorchvision==0.8.2\nscipy\npycocotools\nmatplotlib\ntqdm\ntensorboard==2.1.0\nPyYAML\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/results20210515-152935.txt",
    "content": "epoch:0 0.2934  0.6118  0.2275  0.0649  0.2581  0.3549  0.2737  0.4685  0.4842  0.1264  0.4246  0.5404  10.2811  0.001\nepoch:1 0.4889  0.7742  0.5507  0.1366  0.3838  0.577  0.4132  0.6066  0.6205  0.2383  0.5264  0.6874  6.6461  0.000997\nepoch:2 0.4981  0.7902  0.5599  0.1587  0.3863  0.5807  0.4162  0.6044  0.618  0.2694  0.5156  0.6828  6.0806  0.000989\nepoch:3 0.4907  0.7876  0.546  0.1648  0.3848  0.5721  0.4133  0.6051  0.62  0.2874  0.5287  0.6791  5.8333  0.000976\nepoch:4 0.5099  0.7901  0.5811  0.1726  0.4117  0.59  0.4224  0.6193  0.6344  0.2929  0.5474  0.6911  5.6126  0.00957\nepoch:5 0.5218  0.8066  0.5912  0.178  0.4189  0.5916  0.4292  0.6251  0.6396  0.2879  0.5427  0.6997  5.6512  0.000934\nepoch:6 0.5187  0.8009  0.5893  0.1794  0.4257  0.5925  0.422  0.6202  0.6359  0.3063  0.5635  0.6893  5.4484  0.000905\nepoch:7 0.5336  0.8059  0.6076  0.1823  0.429  0.6138  0.4379  0.6346  0.6495  0.3054  0.5545  0.711  5.3175  0.000873\nepoch:8 0.5498  0.8066  0.6218  0.1735  0.4369  0.6336  0.4456  0.6476  0.6626  0.3079  0.5653  0.7285  5.307  0.000836\nepoch:9 0.5445  0.8057  0.6274  0.1825  0.445  0.6269  0.438  0.6411  0.6557  0.3076  0.5747  0.7181  5.148  0.000796\nepoch:10 0.532  0.798  0.6059  0.1833  0.4272  0.6159  0.4395  0.6376  0.6512  0.3086  0.5537  0.7181  5.0508  0.000752\nepoch:11 0.5574  0.8063  0.6272  0.1873  0.44  0.6416  0.4524  0.6543  0.6682  0.3139  0.5658  0.7358  5.1974  0.000706\nepoch:12 0.5675  0.8088  0.6422  0.1985  0.4522  0.6542  0.4584  0.6609  0.6734  0.3248  0.5752  0.7415  4.9259  0.000658\nepoch:13 0.5553  0.8114  0.6323  0.1854  0.439  0.6358  0.4466  0.6481  0.662  0.3285  0.5664  0.7247  4.7405  0.000608\nepoch:14 0.5663  0.8106  0.6375  0.1873  0.4472  0.6588  0.4565  0.6612  0.6745  0.307  0.5721  0.7462  4.9034  0.000557\nepoch:15 0.5627  0.8094  0.6354  0.1939  0.4462  0.6529  0.4526  0.6569  0.6703  0.3335  0.5734  0.7374  4.9803  0.000505\nepoch:16 0.5677  0.8085  0.6402  0.1973  0.4517  0.6551  0.4573  0.6629  0.6762  0.3204  0.5772  0.7464  4.6182  0.000453\nepoch:17 0.569  0.8107  0.6387  0.1954  0.4483  0.6604  0.46  0.6666  0.6802  0.3323  0.5761  0.7497  4.7454  0.000402\nepoch:18 0.5783  0.8097  0.646  0.201  0.4564  0.669  0.4661  0.6719  0.6847  0.3265  0.5749  0.7567  4.4123  0.000352\nepoch:19 0.5808  0.8111  0.642  0.188  0.4568  0.6721  0.4654  0.6734  0.6866  0.3172  0.5771  0.7591  4.5915  0.000304\nepoch:20 0.5774  0.8078  0.6411  0.1916  0.4519  0.6753  0.4669  0.6745  0.6881  0.3247  0.5772  0.7601  4.6747  0.000258\nepoch:21 0.5879  0.8127  0.6522  0.197  0.4559  0.683  0.4716  0.6801  0.6934  0.3189  0.5778  0.7702  4.485  0.000214\nepoch:22 0.5858  0.8074  0.6515  0.2126  0.4622  0.6786  0.4695  0.677  0.6904  0.3427  0.5814  0.7609  4.5962  0.000174\nepoch:23 0.5893  0.8127  0.6501  0.2056  0.4596  0.6851  0.4729  0.682  0.6954  0.3345  0.583  0.7703  4.4483  0.000137\nepoch:24 0.5902  0.8123  0.654  0.197  0.4601  0.6867  0.4738  0.683  0.6969  0.3279  0.5835  0.773  4.4526  0.000105\nepoch:25 0.5948  0.816  0.6585  0.2031  0.4691  0.6888  0.4766  0.6864  0.7002  0.3379  0.5899  0.7736  4.4878  0.000076\nepoch:26 0.5921  0.8136  0.6555  0.2036  0.4706  0.6847  0.4746  0.6841  0.6975  0.3446  0.5915  0.7681  4.5259  0.000053\nepoch:27 0.5896  0.8089  0.6511  0.204  0.4666  0.6839  0.4734  0.682  0.6949  0.3422  0.5852  0.7669  4.3678  0.000034\nepoch:28 0.5956  0.8149  0.6579  0.2089  0.4683  0.6893  0.4768  0.6868  0.7008  0.3448  0.5908  0.7741  4.5182  0.000021\nepoch:29 0.5907  0.8097  0.6508  0.2078  0.4701  0.6831  0.4726  0.682  0.695  0.3476  0.5906  0.7645  4.2529  0.000013\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/train.py",
    "content": "import datetime\nimport argparse\n\nimport yaml\nimport torch.optim as optim\nimport torch.optim.lr_scheduler as lr_scheduler\nfrom torch.utils.tensorboard import SummaryWriter\n\nfrom models import *\nfrom build_utils.datasets import *\nfrom build_utils.utils import *\nfrom train_utils import train_eval_utils as train_util\nfrom train_utils import get_coco_api_from_dataset\n\n\ndef train(hyp):\n    device = torch.device(opt.device if torch.cuda.is_available() else \"cpu\")\n    print(\"Using {} device training.\".format(device.type))\n\n    wdir = \"weights\" + os.sep  # weights dir\n    best = wdir + \"best.pt\"\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    cfg = opt.cfg\n    data = opt.data\n    epochs = opt.epochs\n    batch_size = opt.batch_size\n    accumulate = max(round(64 / batch_size), 1)  # accumulate n times before optimizer update (bs 64)\n    weights = opt.weights  # initial training weights\n    imgsz_train = opt.img_size\n    imgsz_test = opt.img_size  # test image sizes\n    multi_scale = opt.multi_scale\n\n    # Image sizes\n    # 图像要设置成32的倍数\n    gs = 32  # (pixels) grid size\n    assert math.fmod(imgsz_test, gs) == 0, \"--img-size %g must be a %g-multiple\" % (imgsz_test, gs)\n    grid_min, grid_max = imgsz_test // gs, imgsz_test // gs\n    if multi_scale:\n        imgsz_min = opt.img_size // 1.5\n        imgsz_max = opt.img_size // 0.667\n\n        # 将给定的最大，最小输入尺寸向下调整到32的整数倍\n        grid_min, grid_max = imgsz_min // gs, imgsz_max // gs\n        imgsz_min, imgsz_max = int(grid_min * gs), int(grid_max * gs)\n        imgsz_train = imgsz_max  # initialize with max size\n        print(\"Using multi_scale training, image range[{}, {}]\".format(imgsz_min, imgsz_max))\n\n    # configure run\n    # init_seeds()  # 初始化随机种子，保证结果可复现\n    data_dict = parse_data_cfg(data)\n    train_path = data_dict[\"train\"]\n    test_path = data_dict[\"valid\"]\n    nc = 1 if opt.single_cls else int(data_dict[\"classes\"])  # number of classes\n    hyp[\"cls\"] *= nc / 80  # update coco-tuned hyp['cls'] to current dataset\n    hyp[\"obj\"] *= imgsz_test / 320\n\n    # Remove previous results\n    for f in glob.glob(results_file):\n        os.remove(f)\n\n    # Initialize model\n    model = Darknet(cfg).to(device)\n\n    # 是否冻结权重，只训练predictor的权重\n    if opt.freeze_layers:\n        # 索引减一对应的是predictor的索引，YOLOLayer并不是predictor\n        output_layer_indices = [idx - 1 for idx, module in enumerate(model.module_list) if\n                                isinstance(module, YOLOLayer)]\n        # 冻结除predictor和YOLOLayer外的所有层\n        freeze_layer_indeces = [x for x in range(len(model.module_list)) if\n                                (x not in output_layer_indices) and\n                                (x - 1 not in output_layer_indices)]\n        # Freeze non-output layers\n        # 总共训练3x2=6个parameters\n        for idx in freeze_layer_indeces:\n            for parameter in model.module_list[idx].parameters():\n                parameter.requires_grad_(False)\n    else:\n        # 如果freeze_layer为False，默认仅训练除darknet53之后的部分\n        # 若要训练全部权重，删除以下代码\n        darknet_end_layer = 74  # only yolov3spp cfg\n        # Freeze darknet53 layers\n        # 总共训练21x3+3x2=69个parameters\n        for idx in range(darknet_end_layer + 1):  # [0, 74]\n            for parameter in model.module_list[idx].parameters():\n                parameter.requires_grad_(False)\n\n    # optimizer\n    pg = [p for p in model.parameters() if p.requires_grad]\n    optimizer = optim.SGD(pg, lr=hyp[\"lr0\"], momentum=hyp[\"momentum\"],\n                          weight_decay=hyp[\"weight_decay\"], nesterov=True)\n\n    scaler = torch.cuda.amp.GradScaler() if opt.amp else None\n\n    start_epoch = 0\n    best_map = 0.0\n    if weights.endswith(\".pt\") or weights.endswith(\".pth\"):\n        ckpt = torch.load(weights, map_location=device)\n\n        # load model\n        try:\n            ckpt[\"model\"] = {k: v for k, v in ckpt[\"model\"].items() if model.state_dict()[k].numel() == v.numel()}\n            model.load_state_dict(ckpt[\"model\"], strict=False)\n        except KeyError as e:\n            s = \"%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s. \" \\\n                \"See https://github.com/ultralytics/yolov3/issues/657\" % (opt.weights, opt.cfg, opt.weights)\n            raise KeyError(s) from e\n\n        # load optimizer\n        if ckpt[\"optimizer\"] is not None:\n            optimizer.load_state_dict(ckpt[\"optimizer\"])\n            if \"best_map\" in ckpt.keys():\n                best_map = ckpt[\"best_map\"]\n\n        # load results\n        if ckpt.get(\"training_results\") is not None:\n            with open(results_file, \"w\") as file:\n                file.write(ckpt[\"training_results\"])  # write results.txt\n\n        # epochs\n        start_epoch = ckpt[\"epoch\"] + 1\n        if epochs < start_epoch:\n            print('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %\n                  (opt.weights, ckpt['epoch'], epochs))\n            epochs += ckpt['epoch']  # finetune additional epochs\n\n        if opt.amp and \"scaler\" in ckpt:\n            scaler.load_state_dict(ckpt[\"scaler\"])\n\n        del ckpt\n\n    # Scheduler https://arxiv.org/pdf/1812.01187.pdf\n    lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp[\"lrf\"]) + hyp[\"lrf\"]  # cosine\n    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)\n    scheduler.last_epoch = start_epoch  # 指定从哪个epoch开始\n\n    # Plot lr schedule\n    # y = []\n    # for _ in range(epochs):\n    #     scheduler.step()\n    #     y.append(optimizer.param_groups[0]['lr'])\n    # plt.plot(y, '.-', label='LambdaLR')\n    # plt.xlabel('epoch')\n    # plt.ylabel('LR')\n    # plt.tight_layout()\n    # plt.savefig('LR.png', dpi=300)\n\n    # model.yolo_layers = model.module.yolo_layers\n\n    # dataset\n    # 训练集的图像尺寸指定为multi_scale_range中最大的尺寸\n    train_dataset = LoadImagesAndLabels(train_path, imgsz_train, batch_size,\n                                        augment=True,\n                                        hyp=hyp,  # augmentation hyperparameters\n                                        rect=opt.rect,  # rectangular training\n                                        cache_images=opt.cache_images,\n                                        single_cls=opt.single_cls)\n\n    # 验证集的图像尺寸指定为img_size(512)\n    val_dataset = LoadImagesAndLabels(test_path, imgsz_test, batch_size,\n                                      hyp=hyp,\n                                      rect=True,  # 将每个batch的图像调整到合适大小，可减少运算量(并不是512x512标准尺寸)\n                                      cache_images=opt.cache_images,\n                                      single_cls=opt.single_cls)\n\n    # dataloader\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    train_dataloader = torch.utils.data.DataLoader(train_dataset,\n                                                   batch_size=batch_size,\n                                                   num_workers=nw,\n                                                   # Shuffle=True unless rectangular training is used\n                                                   shuffle=not opt.rect,\n                                                   pin_memory=True,\n                                                   collate_fn=train_dataset.collate_fn)\n\n    val_datasetloader = torch.utils.data.DataLoader(val_dataset,\n                                                    batch_size=batch_size,\n                                                    num_workers=nw,\n                                                    pin_memory=True,\n                                                    collate_fn=val_dataset.collate_fn)\n\n    # Model parameters\n    model.nc = nc  # attach number of classes to model\n    model.hyp = hyp  # attach hyperparameters to model\n    model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)\n    # 计算每个类别的目标个数，并计算每个类别的比重\n    # model.class_weights = labels_to_class_weights(train_dataset.labels, nc).to(device)  # attach class weights\n\n    # start training\n    # caching val_data when you have plenty of memory(RAM)\n    # coco = None\n    coco = get_coco_api_from_dataset(val_dataset)\n\n    print(\"starting traning for %g epochs...\" % epochs)\n    print('Using %g dataloader workers' % nw)\n    for epoch in range(start_epoch, epochs):\n        mloss, lr = train_util.train_one_epoch(model, optimizer, train_dataloader,\n                                               device, epoch,\n                                               accumulate=accumulate,  # 迭代多少batch才训练完64张图片\n                                               img_size=imgsz_train,  # 输入图像的大小\n                                               multi_scale=multi_scale,\n                                               grid_min=grid_min,  # grid的最小尺寸\n                                               grid_max=grid_max,  # grid的最大尺寸\n                                               gs=gs,  # grid step: 32\n                                               print_freq=50,  # 每训练多少个step打印一次信息\n                                               warmup=True,\n                                               scaler=scaler)\n        # update scheduler\n        scheduler.step()\n\n        if opt.notest is False or epoch == epochs - 1:\n            # evaluate on the test dataset\n            result_info = train_util.evaluate(model, val_datasetloader,\n                                              coco=coco, device=device)\n\n            coco_mAP = result_info[0]\n            voc_mAP = result_info[1]\n            coco_mAR = result_info[8]\n\n            # write into tensorboard\n            if tb_writer:\n                tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss', 'train/loss', \"learning_rate\",\n                        \"mAP@[IoU=0.50:0.95]\", \"mAP@[IoU=0.5]\", \"mAR@[IoU=0.50:0.95]\"]\n\n                for x, tag in zip(mloss.tolist() + [lr, coco_mAP, voc_mAP, coco_mAR], tags):\n                    tb_writer.add_scalar(tag, x, epoch)\n\n            # write into txt\n            with open(results_file, \"a\") as f:\n                # 记录coco的12个指标加上训练总损失和lr\n                result_info = [str(round(i, 4)) for i in result_info + [mloss.tolist()[-1]]] + [str(round(lr, 6))]\n                txt = \"epoch:{} {}\".format(epoch, '  '.join(result_info))\n                f.write(txt + \"\\n\")\n\n            # update best mAP(IoU=0.50:0.95)\n            if coco_mAP > best_map:\n                best_map = coco_mAP\n\n            if opt.savebest is False:\n                # save weights every epoch\n                with open(results_file, 'r') as f:\n                    save_files = {\n                        'model': model.state_dict(),\n                        'optimizer': optimizer.state_dict(),\n                        'training_results': f.read(),\n                        'epoch': epoch,\n                        'best_map': best_map}\n                    if opt.amp:\n                        save_files[\"scaler\"] = scaler.state_dict()\n                    torch.save(save_files, \"./weights/yolov3spp-{}.pt\".format(epoch))\n            else:\n                # only save best weights\n                if best_map == coco_mAP:\n                    with open(results_file, 'r') as f:\n                        save_files = {\n                            'model': model.state_dict(),\n                            'optimizer': optimizer.state_dict(),\n                            'training_results': f.read(),\n                            'epoch': epoch,\n                            'best_map': best_map}\n                        if opt.amp:\n                            save_files[\"scaler\"] = scaler.state_dict()\n                        torch.save(save_files, best.format(epoch))\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--epochs', type=int, default=30)\n    parser.add_argument('--batch-size', type=int, default=4)\n    parser.add_argument('--cfg', type=str, default='cfg/my_yolov3.cfg', help=\"*.cfg path\")\n    parser.add_argument('--data', type=str, default='data/my_data.data', help='*.data path')\n    parser.add_argument('--hyp', type=str, default='cfg/hyp.yaml', help='hyperparameters path')\n    parser.add_argument('--multi-scale', type=bool, default=True,\n                        help='adjust (67%% - 150%%) img_size every 10 batches')\n    parser.add_argument('--img-size', type=int, default=512, help='test size')\n    parser.add_argument('--rect', action='store_true', help='rectangular training')\n    parser.add_argument('--savebest', type=bool, default=False, help='only save best checkpoint')\n    parser.add_argument('--notest', action='store_true', help='only test final epoch')\n    parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')\n    parser.add_argument('--weights', type=str, default='weights/yolov3-spp-ultralytics-512.pt',\n                        help='initial weights path')\n    parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied')\n    parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')\n    parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')\n    parser.add_argument('--freeze-layers', type=bool, default=False, help='Freeze non-output layers')\n    # 是否使用混合精度训练(需要GPU支持混合精度)\n    parser.add_argument(\"--amp\", default=False, help=\"Use torch.cuda.amp for mixed precision training\")\n    opt = parser.parse_args()\n\n    # 检查文件是否存在\n    opt.cfg = check_file(opt.cfg)\n    opt.data = check_file(opt.data)\n    opt.hyp = check_file(opt.hyp)\n    print(opt)\n\n    with open(opt.hyp) as f:\n        hyp = yaml.load(f, Loader=yaml.FullLoader)\n\n    print('Start Tensorboard with \"tensorboard --logdir=runs\", view at http://localhost:6006/')\n    tb_writer = SummaryWriter(comment=opt.name)\n    train(hyp)\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/train_multi_GPU.py",
    "content": "import argparse\nimport datetime\nimport pickle\n\nimport yaml\nimport torch.optim as optim\nimport torch.optim.lr_scheduler as lr_scheduler\nfrom torch.utils.tensorboard import SummaryWriter\n\n\nfrom models import *\nfrom build_utils.datasets import *\nfrom build_utils.utils import *\nfrom train_utils import train_eval_utils as train_util\nfrom train_utils import get_coco_api_from_dataset, init_distributed_mode, torch_distributed_zero_first\n\n\ndef main(opt, hyp):\n    # 初始化各进程\n    init_distributed_mode(opt)\n\n    if opt.rank in [-1, 0]:\n        print(opt)\n        print('Start Tensorboard with \"tensorboard --logdir=runs\", view at http://localhost:6006/')\n        tb_writer = SummaryWriter(comment=opt.name)\n\n    device = torch.device(opt.device)\n    if \"cuda\" not in device.type:\n        raise EnvironmentError(\"not find GPU device for training.\")\n\n    # 使用DDP后会对每个device上的gradients取均值，所以需要放大学习率\n    hyp[\"lr0\"] *= max(1., opt.world_size * opt.batch_size / 64)\n\n    wdir = \"weights\" + os.sep  # weights dir\n    best = wdir + \"best.pt\"\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    cfg = opt.cfg\n    data = opt.data\n    epochs = opt.epochs\n    batch_size = opt.batch_size\n    # accumulate n times before optimizer update (bs 64)\n    accumulate = max(round(64 / (opt.world_size * opt.batch_size)), 1)\n    weights = opt.weights  # initial training weights\n    imgsz_train = opt.img_size\n    imgsz_test = opt.img_size  # test image sizes\n    multi_scale = opt.multi_scale\n\n    # Image sizes\n    # 图像要设置成32的倍数\n    gs = 32  # (pixels) grid size\n    assert math.fmod(imgsz_test, gs) == 0, \"--img-size %g must be a %g-multiple\" % (imgsz_test, gs)\n    grid_min, grid_max = imgsz_test // gs, imgsz_test // gs\n    if multi_scale:\n        imgsz_min = opt.img_size // 1.5\n        imgsz_max = opt.img_size // 0.667\n\n        # 将给定的最大，最小输入尺寸向下调整到32的整数倍\n        grid_min, grid_max = imgsz_min // gs, imgsz_max // gs\n        imgsz_min, imgsz_max = int(grid_min * gs), int(grid_max * gs)\n        imgsz_train = imgsz_max  # initialize with max size\n        if opt.rank in [-1, 0]:  # 只在第一个进程中显示打印信息\n            print(\"Using multi_scale training, image range[{}, {}]\".format(imgsz_min, imgsz_max))\n\n    # configure run\n    random.seed(0)  # 设置随机种子\n    data_dict = parse_data_cfg(data)\n    train_path = data_dict[\"train\"]\n    test_path = data_dict[\"valid\"]\n    nc = 1 if opt.single_cls else int(data_dict[\"classes\"])  # number of classes\n    hyp[\"cls\"] *= nc / 80  # update coco-tuned hyp['cls'] to current dataset\n    hyp[\"obj\"] *= imgsz_test / 320\n\n    if opt.rank in [-1, 0]:\n        # Remove previous results\n        for f in glob.glob(results_file) + glob.glob(\"tmp.pk\"):\n            os.remove(f)\n\n    # Initialize model\n    model = Darknet(cfg).to(device)\n\n    start_epoch = 0\n    best_map = 0.0\n    # 如果指定了预训练权重，则载入预训练权重\n    if weights.endswith(\".pt\"):\n        ckpt = torch.load(weights, map_location=device)\n\n        # load model\n        try:\n            ckpt[\"model\"] = {k: v for k, v in ckpt[\"model\"].items()\n                             if model.state_dict()[k].numel() == v.numel()}\n            model.load_state_dict(ckpt[\"model\"], strict=False)\n        except KeyError as e:\n            s = \"%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s. \" \\\n                \"See https://github.com/ultralytics/yolov3/issues/657\" % (opt.weights, opt.cfg, opt.weights)\n            raise KeyError(s) from e\n\n        if opt.rank in [-1, 0]:\n            # load results\n            if ckpt.get(\"training_results\") is not None:\n                with open(results_file, \"w\") as file:\n                    file.write(ckpt[\"training_results\"])  # write results.txt\n\n        # epochs\n        start_epoch = ckpt[\"epoch\"] + 1\n        if epochs < start_epoch:\n            print('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %\n                  (opt.weights, ckpt['epoch'], epochs))\n            epochs += ckpt['epoch']  # finetune additional epochs\n\n        if opt.amp and \"scaler\" in ckpt:\n            scaler.load_state_dict(ckpt[\"scaler\"])\n\n        del ckpt\n\n    # 是否冻结权重，只训练predictor的权重\n    if opt.freeze_layers:\n        # 索引减一对应的是predictor的索引，YOLOLayer并不是predictor\n        output_layer_indices = [idx - 1 for idx, module in enumerate(model.module_list) if\n                                isinstance(module, YOLOLayer)]\n        # 冻结除predictor和YOLOLayer外的所有层\n        freeze_layer_indeces = [x for x in range(len(model.module_list)) if\n                                (x not in output_layer_indices) and\n                                (x - 1 not in output_layer_indices)]\n        # Freeze non-output layers\n        # 总共训练3x2=6个parameters\n        for idx in freeze_layer_indeces:\n            for parameter in model.module_list[idx].parameters():\n                parameter.requires_grad_(False)\n    else:\n        # 如果freeze_layer为False，默认仅训练除darknet53之后的部分\n        # 若要训练全部权重，删除以下代码\n        darknet_end_layer = 74  # only yolov3spp cfg\n        # Freeze darknet53 layers\n        # 总共训练21x3+3x2=69个parameters\n        for idx in range(darknet_end_layer + 1):  # [0, 74]\n            for parameter in model.module_list[idx].parameters():\n                parameter.requires_grad_(False)\n\n    # SyncBatchNorm\n    # 如果只训练最后的predictor(其中不含bn层)，SyncBatchNorm没有作用\n    if opt.freeze_layers is False:\n        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)\n\n    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[opt.gpu])\n    model.yolo_layers = model.module.yolo_layers  # move yolo layer indices to top level\n\n    # optimizer\n    pg = [p for p in model.parameters() if p.requires_grad]\n    optimizer = optim.SGD(pg, lr=hyp[\"lr0\"], momentum=hyp[\"momentum\"],\n                          weight_decay=hyp[\"weight_decay\"], nesterov=True)\n\n    scaler = torch.cuda.amp.GradScaler() if opt.amp else None\n\n    # Scheduler https://arxiv.org/pdf/1812.01187.pdf\n    lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp[\"lrf\"]) + hyp[\"lrf\"]  # cosine\n    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)\n    scheduler.last_epoch = start_epoch  # 指定从哪个epoch开始\n\n    # dataset\n    # 训练集的图像尺寸指定为multi_scale_range中最大的尺寸\n    # Make sure only the first process in DDP process the dataset first, and the following others can use the cache.\n    with torch_distributed_zero_first(opt.rank):\n        train_dataset = LoadImagesAndLabels(train_path, imgsz_train, batch_size,\n                                            augment=True,\n                                            hyp=hyp,  # augmentation hyperparameters\n                                            rect=opt.rect,  # rectangular training\n                                            cache_images=opt.cache_images,\n                                            single_cls=opt.single_cls,\n                                            rank=opt.rank)\n        # 验证集的图像尺寸指定为img_size(512)\n        val_dataset = LoadImagesAndLabels(test_path, imgsz_test, batch_size,\n                                          hyp=hyp,\n                                          cache_images=opt.cache_images,\n                                          single_cls=opt.single_cls,\n                                          rank=opt.rank)\n\n    # 给每个rank对应的进程分配训练的样本索引\n    train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)\n    val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)\n    # 将样本索引每batch_size个元素组成一个list\n    train_batch_sampler = torch.utils.data.BatchSampler(\n        train_sampler, batch_size, drop_last=True)\n\n    # dataloader\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    if opt.rank in [-1, 0]:\n        print('Using %g dataloader workers' % nw)\n    train_data_loader = torch.utils.data.DataLoader(\n        train_dataset, batch_sampler=train_batch_sampler, num_workers=nw,\n        pin_memory=True, collate_fn=train_dataset.collate_fn)\n\n    val_data_loader = torch.utils.data.DataLoader(\n        val_dataset, batch_size=batch_size,\n        sampler=val_sampler, num_workers=nw,\n        pin_memory=True, collate_fn=val_dataset.collate_fn)\n\n    # Model parameters\n    model.nc = nc  # attach number of classes to model\n    model.hyp = hyp  # attach hyperparameters to model\n    model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)\n\n    # start training\n    # caching val_data when you have plenty of memory(RAM)\n    with torch_distributed_zero_first(opt.rank):\n        if os.path.exists(\"tmp.pk\") is False:\n            coco = get_coco_api_from_dataset(val_dataset)\n            with open(\"tmp.pk\", \"wb\") as f:\n                pickle.dump(coco, f)\n        else:\n            with open(\"tmp.pk\", \"rb\") as f:\n                coco = pickle.load(f)\n\n    if opt.rank in [-1, 0]:\n        print(\"starting traning for %g epochs...\" % epochs)\n        print('Using %g dataloader workers' % nw)\n\n    start_time = time.time()\n    for epoch in range(start_epoch, epochs):\n        train_sampler.set_epoch(epoch)\n        mloss, lr = train_util.train_one_epoch(model, optimizer, train_data_loader,\n                                               device, epoch,\n                                               accumulate=accumulate,  # 迭代多少batch才训练完64张图片\n                                               img_size=imgsz_train,  # 输入图像的大小\n                                               multi_scale=multi_scale,\n                                               grid_min=grid_min,  # grid的最小尺寸\n                                               grid_max=grid_max,  # grid的最大尺寸\n                                               gs=gs,  # grid step: 32\n                                               print_freq=50,  # 每训练多少个step打印一次信息\n                                               warmup=True,\n                                               scaler=scaler)\n        # update scheduler\n        scheduler.step()\n\n        if opt.notest is False or epoch == epochs - 1:\n            # evaluate on the test dataset\n            result_info = train_util.evaluate(model, val_data_loader,\n                                              coco=coco, device=device)\n\n            # only first process in DDP process to record info and save weights\n            if opt.rank in [-1, 0]:\n                coco_mAP = result_info[0]\n                voc_mAP = result_info[1]\n                coco_mAR = result_info[8]\n\n                # write into tensorboard\n                if tb_writer:\n                    tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss', 'train/loss', \"learning_rate\",\n                            \"mAP@[IoU=0.50:0.95]\", \"mAP@[IoU=0.5]\", \"mAR@[IoU=0.50:0.95]\"]\n\n                    for x, tag in zip(mloss.tolist() + [lr, coco_mAP, voc_mAP, coco_mAR], tags):\n                        tb_writer.add_scalar(tag, x, epoch)\n\n                # write into txt\n                with open(results_file, \"a\") as f:\n                    # 记录coco的12个指标加上训练总损失和lr\n                    result_info = [str(round(i, 4)) for i in result_info + [mloss.tolist()[-1]]] + [str(round(lr, 6))]\n                    txt = \"epoch:{} {}\".format(epoch, '  '.join(result_info))\n                    f.write(txt + \"\\n\")\n\n                # update best mAP(IoU=0.50:0.95)\n                if coco_mAP > best_map:\n                    best_map = coco_mAP\n\n                if opt.savebest is False:\n                    # save weights every epoch\n                    with open(results_file, 'r') as f:\n                        save_files = {\n                            'model': model.module.state_dict(),\n                            'optimizer': optimizer.state_dict(),\n                            'training_results': f.read(),\n                            'epoch': epoch,\n                            'best_map': best_map}\n                        if opt.amp:\n                            save_files[\"scaler\"] = scaler.state_dict()\n                        torch.save(save_files, \"./weights/yolov3spp-{}.pt\".format(epoch))\n                else:\n                    # only save best weights\n                    if best_map == coco_mAP:\n                        with open(results_file, 'r') as f:\n                            save_files = {\n                                'model': model.module.state_dict(),\n                                'optimizer': optimizer.state_dict(),\n                                'training_results': f.read(),\n                                'epoch': epoch,\n                                'best_map': best_map}\n                            if opt.amp:\n                                save_files[\"scaler\"] = scaler.state_dict()\n                            torch.save(save_files, best.format(epoch))\n\n    total_time = time.time() - start_time\n    total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n    if opt.rank in [-1, 0]:\n        print('Training time {}'.format(total_time_str))\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--epochs', type=int, default=30)\n    parser.add_argument('--batch-size', type=int, default=16)\n    parser.add_argument('--cfg', type=str, default='cfg/my_yolov3.cfg', help=\"*.cfg path\")\n    parser.add_argument('--data', type=str, default='data/my_data.data', help='*.data path')\n    parser.add_argument('--hyp', type=str, default='cfg/hyp.yaml', help='hyperparameters path')\n    parser.add_argument('--multi-scale', type=bool, default=True,\n                        help='adjust (67%% - 150%%) img_size every 10 batches')\n    parser.add_argument('--img-size', type=int, default=512, help='test size')\n    parser.add_argument('--rect', action='store_true', help='rectangular training')\n    parser.add_argument('--savebest', type=bool, default=False, help='only save best checkpoint')\n    parser.add_argument('--notest', action='store_true', help='only test final epoch')\n    parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')\n    parser.add_argument('--weights', type=str, default='weights/yolov3-spp-ultralytics-512.pt',\n                        help='initial weights path')\n    parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied')\n    parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0,1 or cpu)')\n    parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')\n    parser.add_argument('--freeze-layers', type=bool, default=False, help='Freeze non-output layers')\n    # 开启的进程数(注意不是线程),不用设置该参数，会根据nproc_per_node自动设置\n    parser.add_argument('--world-size', default=4, type=int,\n                        help='number of distributed processes')\n    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')\n    # 是否使用混合精度训练(需要GPU支持混合精度)\n    parser.add_argument(\"--amp\", default=False, help=\"Use torch.cuda.amp for mixed precision training\")\n\n    opt = parser.parse_args()\n\n    # 检查文件是否存在\n    opt.cfg = check_file(opt.cfg)\n    opt.data = check_file(opt.data)\n    opt.hyp = check_file(opt.hyp)\n\n    with open(opt.hyp) as f:\n        hyp = yaml.load(f, Loader=yaml.FullLoader)\n\n    main(opt, hyp)\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/train_utils/__init__.py",
    "content": "from .coco_utils import get_coco_api_from_dataset\nfrom .coco_eval import CocoEvaluator\nfrom .distributed_utils import init_distributed_mode, torch_distributed_zero_first\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/train_utils/coco_eval.py",
    "content": "import json\nimport copy\nfrom collections import defaultdict\n\nimport numpy as np\nimport torch\nimport torch._six\nfrom pycocotools.cocoeval import COCOeval\nfrom pycocotools.coco import COCO\nimport pycocotools.mask as mask_util\n\nfrom .distributed_utils import all_gather\n\n\nclass CocoEvaluator(object):\n    def __init__(self, coco_gt, iou_types):\n        assert isinstance(iou_types, (list, tuple))\n        coco_gt = copy.deepcopy(coco_gt)\n        self.coco_gt = coco_gt\n\n        self.iou_types = iou_types\n        self.coco_eval = {}\n        for iou_type in iou_types:\n            self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)\n\n        self.img_ids = []\n        self.eval_imgs = {k: [] for k in iou_types}\n\n    def update(self, predictions):\n        img_ids = list(np.unique(list(predictions.keys())))\n        self.img_ids.extend(img_ids)\n\n        for iou_type in self.iou_types:\n            results = self.prepare(predictions, iou_type)\n            coco_dt = loadRes(self.coco_gt, results) if results else COCO()\n            coco_eval = self.coco_eval[iou_type]\n\n            coco_eval.cocoDt = coco_dt\n            coco_eval.params.imgIds = list(img_ids)\n            img_ids, eval_imgs = evaluate(coco_eval)\n\n            self.eval_imgs[iou_type].append(eval_imgs)\n\n    def synchronize_between_processes(self):\n        for iou_type in self.iou_types:\n            self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)\n            create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])\n\n    def accumulate(self):\n        for coco_eval in self.coco_eval.values():\n            coco_eval.accumulate()\n\n    def summarize(self):\n        for iou_type, coco_eval in self.coco_eval.items():\n            print(\"IoU metric: {}\".format(iou_type))\n            coco_eval.summarize()\n\n    def prepare(self, predictions, iou_type):\n        if iou_type == \"bbox\":\n            return self.prepare_for_coco_detection(predictions)\n        elif iou_type == \"segm\":\n            return self.prepare_for_coco_segmentation(predictions)\n        elif iou_type == \"keypoints\":\n            return self.prepare_for_coco_keypoint(predictions)\n        else:\n            raise ValueError(\"Unknown iou type {}\".format(iou_type))\n\n    def prepare_for_coco_detection(self, predictions):\n        coco_results = []\n        for original_id, prediction in predictions.items():\n            if len(prediction) == 0:\n                continue\n\n            boxes = prediction[\"boxes\"]\n            boxes = convert_to_xywh(boxes).tolist()\n            scores = prediction[\"scores\"].tolist()\n            labels = prediction[\"labels\"].tolist()\n\n            coco_results.extend(\n                [\n                    {\n                        \"image_id\": original_id,\n                        \"category_id\": labels[k],\n                        \"bbox\": box,\n                        \"score\": scores[k],\n                    }\n                    for k, box in enumerate(boxes)\n                ]\n            )\n        return coco_results\n\n    def prepare_for_coco_segmentation(self, predictions):\n        coco_results = []\n        for original_id, prediction in predictions.items():\n            if len(prediction) == 0:\n                continue\n\n            scores = prediction[\"scores\"]\n            labels = prediction[\"labels\"]\n            masks = prediction[\"masks\"]\n\n            masks = masks > 0.5\n\n            scores = prediction[\"scores\"].tolist()\n            labels = prediction[\"labels\"].tolist()\n\n            rles = [\n                mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order=\"F\"))[0]\n                for mask in masks\n            ]\n            for rle in rles:\n                rle[\"counts\"] = rle[\"counts\"].decode(\"utf-8\")\n\n            coco_results.extend(\n                [\n                    {\n                        \"image_id\": original_id,\n                        \"category_id\": labels[k],\n                        \"segmentation\": rle,\n                        \"score\": scores[k],\n                    }\n                    for k, rle in enumerate(rles)\n                ]\n            )\n        return coco_results\n\n    def prepare_for_coco_keypoint(self, predictions):\n        coco_results = []\n        for original_id, prediction in predictions.items():\n            if len(prediction) == 0:\n                continue\n\n            boxes = prediction[\"boxes\"]\n            boxes = convert_to_xywh(boxes).tolist()\n            scores = prediction[\"scores\"].tolist()\n            labels = prediction[\"labels\"].tolist()\n            keypoints = prediction[\"keypoints\"]\n            keypoints = keypoints.flatten(start_dim=1).tolist()\n\n            coco_results.extend(\n                [\n                    {\n                        \"image_id\": original_id,\n                        \"category_id\": labels[k],\n                        'keypoints': keypoint,\n                        \"score\": scores[k],\n                    }\n                    for k, keypoint in enumerate(keypoints)\n                ]\n            )\n        return coco_results\n\n\ndef convert_to_xywh(boxes):\n    xmin, ymin, xmax, ymax = boxes.unbind(1)\n    return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)\n\n\ndef merge(img_ids, eval_imgs):\n    all_img_ids = all_gather(img_ids)\n    all_eval_imgs = all_gather(eval_imgs)\n\n    merged_img_ids = []\n    for p in all_img_ids:\n        merged_img_ids.extend(p)\n\n    merged_eval_imgs = []\n    for p in all_eval_imgs:\n        merged_eval_imgs.append(p)\n\n    merged_img_ids = np.array(merged_img_ids)\n    merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)\n\n    # keep only unique (and in sorted order) images\n    merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)\n    merged_eval_imgs = merged_eval_imgs[..., idx]\n\n    return merged_img_ids, merged_eval_imgs\n\n\ndef create_common_coco_eval(coco_eval, img_ids, eval_imgs):\n    img_ids, eval_imgs = merge(img_ids, eval_imgs)\n    img_ids = list(img_ids)\n    eval_imgs = list(eval_imgs.flatten())\n\n    coco_eval.evalImgs = eval_imgs\n    coco_eval.params.imgIds = img_ids\n    coco_eval._paramsEval = copy.deepcopy(coco_eval.params)\n\n\n#################################################################\n# From pycocotools, just removed the prints and fixed\n# a Python3 bug about unicode not defined\n#################################################################\n\n# Ideally, pycocotools wouldn't have hard-coded prints\n# so that we could avoid copy-pasting those two functions\n\ndef createIndex(self):\n    # create index\n    # print('creating index...')\n    anns, cats, imgs = {}, {}, {}\n    imgToAnns, catToImgs = defaultdict(list), defaultdict(list)\n    if 'annotations' in self.dataset:\n        for ann in self.dataset['annotations']:\n            imgToAnns[ann['image_id']].append(ann)\n            anns[ann['id']] = ann\n\n    if 'images' in self.dataset:\n        for img in self.dataset['images']:\n            imgs[img['id']] = img\n\n    if 'categories' in self.dataset:\n        for cat in self.dataset['categories']:\n            cats[cat['id']] = cat\n\n    if 'annotations' in self.dataset and 'categories' in self.dataset:\n        for ann in self.dataset['annotations']:\n            catToImgs[ann['category_id']].append(ann['image_id'])\n\n    # print('index created!')\n\n    # create class members\n    self.anns = anns\n    self.imgToAnns = imgToAnns\n    self.catToImgs = catToImgs\n    self.imgs = imgs\n    self.cats = cats\n\n\nmaskUtils = mask_util\n\n\ndef loadRes(self, resFile):\n    \"\"\"\n    Load result file and return a result api object.\n    :param   resFile (str)     : file name of result file\n    :return: res (obj)         : result api object\n    \"\"\"\n    res = COCO()\n    res.dataset['images'] = [img for img in self.dataset['images']]\n\n    # print('Loading and preparing results...')\n    # tic = time.time()\n    if isinstance(resFile, torch._six.string_classes):\n        anns = json.load(open(resFile))\n    elif type(resFile) == np.ndarray:\n        anns = self.loadNumpyAnnotations(resFile)\n    else:\n        anns = resFile\n    assert type(anns) == list, 'results in not an array of objects'\n    annsImgIds = [ann['image_id'] for ann in anns]\n    assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \\\n        'Results do not correspond to current coco set'\n    if 'caption' in anns[0]:\n        imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])\n        res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]\n        for id, ann in enumerate(anns):\n            ann['id'] = id + 1\n    elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:\n        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])\n        for id, ann in enumerate(anns):\n            bb = ann['bbox']\n            x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]\n            if 'segmentation' not in ann:\n                ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]\n            ann['area'] = bb[2] * bb[3]\n            ann['id'] = id + 1\n            ann['iscrowd'] = 0\n    elif 'segmentation' in anns[0]:\n        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])\n        for id, ann in enumerate(anns):\n            # now only support compressed RLE format as segmentation results\n            ann['area'] = maskUtils.area(ann['segmentation'])\n            if 'bbox' not in ann:\n                ann['bbox'] = maskUtils.toBbox(ann['segmentation'])\n            ann['id'] = id + 1\n            ann['iscrowd'] = 0\n    elif 'keypoints' in anns[0]:\n        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])\n        for id, ann in enumerate(anns):\n            s = ann['keypoints']\n            x = s[0::3]\n            y = s[1::3]\n            x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y)\n            ann['area'] = (x2 - x1) * (y2 - y1)\n            ann['id'] = id + 1\n            ann['bbox'] = [x1, y1, x2 - x1, y2 - y1]\n    # print('DONE (t={:0.2f}s)'.format(time.time()- tic))\n\n    res.dataset['annotations'] = anns\n    createIndex(res)\n    return res\n\n\ndef evaluate(self):\n    '''\n    Run per image evaluation on given images and store results (a list of dict) in self.evalImgs\n    :return: None\n    '''\n    # tic = time.time()\n    # print('Running per image evaluation...')\n    p = self.params\n    # add backward compatibility if useSegm is specified in params\n    if p.useSegm is not None:\n        p.iouType = 'segm' if p.useSegm == 1 else 'bbox'\n        print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))\n    # print('Evaluate annotation type *{}*'.format(p.iouType))\n    p.imgIds = list(np.unique(p.imgIds))\n    if p.useCats:\n        p.catIds = list(np.unique(p.catIds))\n    p.maxDets = sorted(p.maxDets)\n    self.params = p\n\n    self._prepare()\n    # loop through images, area range, max detection number\n    catIds = p.catIds if p.useCats else [-1]\n\n    if p.iouType == 'segm' or p.iouType == 'bbox':\n        computeIoU = self.computeIoU\n    elif p.iouType == 'keypoints':\n        computeIoU = self.computeOks\n    self.ious = {\n        (imgId, catId): computeIoU(imgId, catId)\n        for imgId in p.imgIds\n        for catId in catIds}\n\n    evaluateImg = self.evaluateImg\n    maxDet = p.maxDets[-1]\n    evalImgs = [\n        evaluateImg(imgId, catId, areaRng, maxDet)\n        for catId in catIds\n        for areaRng in p.areaRng\n        for imgId in p.imgIds\n    ]\n    # this is NOT in the pycocotools code, but could be done outside\n    evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))\n    self._paramsEval = copy.deepcopy(self.params)\n    # toc = time.time()\n    # print('DONE (t={:0.2f}s).'.format(toc-tic))\n    return p.imgIds, evalImgs\n\n#################################################################\n# end of straight copy from pycocotools, just removing the prints\n#################################################################\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/train_utils/coco_utils.py",
    "content": "from tqdm import tqdm\n\nimport torch\nimport torchvision\nimport torch.utils.data\nfrom pycocotools.coco import COCO\n\n\ndef convert_to_coco_api(ds):\n    coco_ds = COCO()\n    # annotation IDs need to start at 1, not 0\n    ann_id = 1\n    dataset = {'images': [], 'categories': [], 'annotations': []}\n    categories = set()\n    # 遍历dataset中的每张图像\n    for img_idx in tqdm(range(len(ds)), desc=\"loading eval info for coco tools.\"):\n        # find better way to get target\n        targets, shapes = ds.coco_index(img_idx)\n        # targets: [num_obj, 6] , that number 6 means -> (img_index, obj_index, x, y, w, h)\n        img_dict = {}\n        img_dict['id'] = img_idx\n        img_dict['height'] = shapes[0]\n        img_dict['width'] = shapes[1]\n        dataset['images'].append(img_dict)\n\n        for obj in targets:\n            ann = {}\n            ann[\"image_id\"] = img_idx\n            # 将相对坐标转为绝对坐标\n            # box (x, y, w, h)\n            boxes = obj[1:]\n            # (x, y, w, h) to (xmin, ymin, w, h)\n            boxes[:2] -= 0.5*boxes[2:]\n            boxes[[0, 2]] *= img_dict[\"width\"]\n            boxes[[1, 3]] *= img_dict[\"height\"]\n            boxes = boxes.tolist()\n\n            ann[\"bbox\"] = boxes\n            ann[\"category_id\"] = int(obj[0])\n            categories.add(int(obj[0]))\n            ann[\"area\"] = boxes[2] * boxes[3]\n            ann[\"iscrowd\"] = 0\n            ann[\"id\"] = ann_id\n            dataset[\"annotations\"].append(ann)\n            ann_id += 1\n\n    dataset['categories'] = [{'id': i} for i in sorted(categories)]\n    coco_ds.dataset = dataset\n    coco_ds.createIndex()\n    return coco_ds\n\n\ndef get_coco_api_from_dataset(dataset):\n    for _ in range(10):\n        if isinstance(dataset, torchvision.datasets.CocoDetection):\n            break\n        if isinstance(dataset, torch.utils.data.Subset):\n            dataset = dataset.dataset\n    if isinstance(dataset, torchvision.datasets.CocoDetection):\n        return dataset.coco\n    return convert_to_coco_api(dataset)\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/train_utils/distributed_utils.py",
    "content": "from collections import defaultdict, deque\nimport datetime\nimport pickle\nimport time\nimport errno\nimport os\nfrom contextlib import contextmanager\n\nimport torch\nimport torch.distributed as dist\n\n\nclass SmoothedValue(object):\n    \"\"\"Track a series of values and provide access to smoothed values over a\n    window or the global series average.\n    \"\"\"\n    def __init__(self, window_size=20, fmt=None):\n        if fmt is None:\n            fmt = \"{value:.4f} ({global_avg:.4f})\"\n        self.deque = deque(maxlen=window_size)  # deque简单理解成加强版list\n        self.total = 0.0\n        self.count = 0\n        self.fmt = fmt\n\n    def update(self, value, n=1):\n        self.deque.append(value)\n        self.count += n\n        self.total += value * n\n\n    def synchronize_between_processes(self):\n        \"\"\"\n        Warning: does not synchronize the deque!\n        \"\"\"\n        if not is_dist_avail_and_initialized():\n            return\n        t = torch.tensor([self.count, self.total], dtype=torch.float64, device=\"cuda\")\n        dist.barrier()\n        dist.all_reduce(t)\n        t = t.tolist()\n        self.count = int(t[0])\n        self.total = t[1]\n\n    @property\n    def median(self):  # @property 是装饰器，这里可简单理解为增加median属性(只读)\n        d = torch.tensor(list(self.deque))\n        return d.median().item()\n\n    @property\n    def avg(self):\n        d = torch.tensor(list(self.deque), dtype=torch.float32)\n        return d.mean().item()\n\n    @property\n    def global_avg(self):\n        return self.total / self.count\n\n    @property\n    def max(self):\n        return max(self.deque)\n\n    @property\n    def value(self):\n        return self.deque[-1]\n\n    def __str__(self):\n        return self.fmt.format(\n            median=self.median,\n            avg=self.avg,\n            global_avg=self.global_avg,\n            max=self.max,\n            value=self.value)\n\n\ndef all_gather(data):\n    \"\"\"\n    Run all_gather on arbitrary picklable data (not necessarily tensors)\n    Args:\n        data: any picklable object\n    Returns:\n        list[data]: list of data gathered from each rank\n    \"\"\"\n    world_size = get_world_size()\n    if world_size == 1:\n        return [data]\n\n    # serialized to a Tensor\n    buffer = pickle.dumps(data)\n    storage = torch.ByteStorage.from_buffer(buffer)\n    tensor = torch.ByteTensor(storage).to(\"cuda\")\n\n    # obtain Tensor size of each rank\n    local_size = torch.tensor([tensor.numel()], device=\"cuda\")\n    size_list = [torch.tensor([0], device=\"cuda\") for _ in range(world_size)]\n    dist.all_gather(size_list, local_size)\n    size_list = [int(size.item()) for size in size_list]\n    max_size = max(size_list)\n\n    # receiving Tensor from all ranks\n    # we pad the tensor because torch all_gather does not support\n    # gathering tensors of different shapes\n    tensor_list = []\n    for _ in size_list:\n        tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device=\"cuda\"))\n    if local_size != max_size:\n        padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device=\"cuda\")\n        tensor = torch.cat((tensor, padding), dim=0)\n    dist.all_gather(tensor_list, tensor)\n\n    data_list = []\n    for size, tensor in zip(size_list, tensor_list):\n        buffer = tensor.cpu().numpy().tobytes()[:size]\n        data_list.append(pickle.loads(buffer))\n\n    return data_list\n\n\ndef reduce_dict(input_dict, average=True):\n    \"\"\"\n    Args:\n        input_dict (dict): all the values will be reduced\n        average (bool): whether to do average or sum\n    Reduce the values in the dictionary from all processes so that all processes\n    have the averaged results. Returns a dict with the same fields as\n    input_dict, after reduction.\n    \"\"\"\n    world_size = get_world_size()\n    if world_size < 2:  # 单GPU的情况\n        return input_dict\n    with torch.no_grad():  # 多GPU的情况\n        names = []\n        values = []\n        # sort the keys so that they are consistent across processes\n        for k in sorted(input_dict.keys()):\n            names.append(k)\n            values.append(input_dict[k])\n        values = torch.stack(values, dim=0)\n        dist.all_reduce(values)\n        if average:\n            values /= world_size\n\n        reduced_dict = {k: v for k, v in zip(names, values)}\n        return reduced_dict\n\n\nclass MetricLogger(object):\n    def __init__(self, delimiter=\"\\t\"):\n        self.meters = defaultdict(SmoothedValue)\n        self.delimiter = delimiter\n\n    def update(self, **kwargs):\n        for k, v in kwargs.items():\n            if isinstance(v, torch.Tensor):\n                v = v.item()\n            assert isinstance(v, (float, int))\n            self.meters[k].update(v)\n\n    def __getattr__(self, attr):\n        if attr in self.meters:\n            return self.meters[attr]\n        if attr in self.__dict__:\n            return self.__dict__[attr]\n        raise AttributeError(\"'{}' object has no attribute '{}'\".format(\n            type(self).__name__, attr))\n\n    def __str__(self):\n        loss_str = []\n        for name, meter in self.meters.items():\n            loss_str.append(\n                \"{}: {}\".format(name, str(meter))\n            )\n        return self.delimiter.join(loss_str)\n\n    def synchronize_between_processes(self):\n        for meter in self.meters.values():\n            meter.synchronize_between_processes()\n\n    def add_meter(self, name, meter):\n        self.meters[name] = meter\n\n    def log_every(self, iterable, print_freq, header=None):\n        i = 0\n        if not header:\n            header = \"\"\n        start_time = time.time()\n        end = time.time()\n        iter_time = SmoothedValue(fmt='{avg:.4f}')\n        data_time = SmoothedValue(fmt='{avg:.4f}')\n        space_fmt = \":\" + str(len(str(len(iterable)))) + \"d\"\n        if torch.cuda.is_available():\n            log_msg = self.delimiter.join([header,\n                                           '[{0' + space_fmt + '}/{1}]',\n                                           'eta: {eta}',\n                                           '{meters}',\n                                           'time: {time}',\n                                           'data: {data}',\n                                           'max mem: {memory:.0f}'])\n        else:\n            log_msg = self.delimiter.join([header,\n                                           '[{0' + space_fmt + '}/{1}]',\n                                           'eta: {eta}',\n                                           '{meters}',\n                                           'time: {time}',\n                                           'data: {data}'])\n        MB = 1024.0 * 1024.0\n        for obj in iterable:\n            data_time.update(time.time() - end)\n            yield obj\n            iter_time.update(time.time() - end)\n            if i % print_freq == 0 or i == len(iterable) - 1:\n                eta_second = iter_time.global_avg * (len(iterable) - i)\n                eta_string = str(datetime.timedelta(seconds=eta_second))\n                if torch.cuda.is_available():\n                    print(log_msg.format(i, len(iterable),\n                                         eta=eta_string,\n                                         meters=str(self),\n                                         time=str(iter_time),\n                                         data=str(data_time),\n                                         memory=torch.cuda.max_memory_allocated() / MB))\n                else:\n                    print(log_msg.format(i, len(iterable),\n                                         eta=eta_string,\n                                         meters=str(self),\n                                         time=str(iter_time),\n                                         data=str(data_time)))\n            i += 1\n            end = time.time()\n        total_time = time.time() - start_time\n        total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n        print('{} Total time: {} ({:.4f} s / it)'.format(header,\n                                                         total_time_str,\n\n                                                         total_time / len(iterable)))\n\n\ndef warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):\n\n    def f(x):\n        \"\"\"根据step数返回一个学习率倍率因子\"\"\"\n        if x >= warmup_iters:  # 当迭代数大于给定的warmup_iters时，倍率因子为1\n            return 1\n        alpha = float(x) / warmup_iters\n        # 迭代过程中倍率因子从warmup_factor -> 1\n        return warmup_factor * (1 - alpha) + alpha\n\n    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)\n\n\ndef mkdir(path):\n    try:\n        os.makedirs(path)\n    except OSError as e:\n        if e.errno != errno.EEXIST:\n            raise\n\n\ndef setup_for_distributed(is_master):\n    \"\"\"\n    This function disables when not in master process\n    \"\"\"\n    import builtins as __builtin__\n    builtin_print = __builtin__.print\n\n    def print(*args, **kwargs):\n        force = kwargs.pop('force', False)\n        if is_master or force:\n            builtin_print(*args, **kwargs)\n\n    __builtin__.print = print\n\n\ndef is_dist_avail_and_initialized():\n    \"\"\"检查是否支持分布式环境\"\"\"\n    if not dist.is_available():\n        return False\n    if not dist.is_initialized():\n        return False\n    return True\n\n\ndef get_world_size():\n    if not is_dist_avail_and_initialized():\n        return 1\n    return dist.get_world_size()\n\n\ndef get_rank():\n    if not is_dist_avail_and_initialized():\n        return 0\n    return dist.get_rank()\n\n\ndef is_main_process():\n    return get_rank() == 0\n\n\ndef save_on_master(*args, **kwargs):\n    if is_main_process():\n        torch.save(*args, **kwargs)\n\n\ndef init_distributed_mode(args):\n    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:\n        args.rank = int(os.environ[\"RANK\"])\n        args.world_size = int(os.environ['WORLD_SIZE'])\n        args.gpu = int(os.environ['LOCAL_RANK'])\n    elif 'SLURM_PROCID' in os.environ:\n        args.rank = int(os.environ['SLURM_PROCID'])\n        args.gpu = args.rank % torch.cuda.device_count()\n    else:\n        print('Not using distributed mode')\n        args.distributed = False\n        return\n\n    args.distributed = True\n\n    torch.cuda.set_device(args.gpu)\n    args.dist_backend = 'nccl'\n    print('| distributed init (rank {}): {}'.format(\n        args.rank, args.dist_url), flush=True)\n    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,\n                                         world_size=args.world_size, rank=args.rank)\n    torch.distributed.barrier()\n    setup_for_distributed(args.rank == 0)\n\n\n@contextmanager\ndef torch_distributed_zero_first(local_rank: int):\n    \"\"\"\n    Decorator to make all processes in distributed training wait for each local_master to do something.\n    \"\"\"\n    if local_rank not in [-1, 0]:\n        torch.distributed.barrier()\n    yield\n    if local_rank == 0:\n        torch.distributed.barrier()\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/train_utils/group_by_aspect_ratio.py",
    "content": "import bisect\nfrom collections import defaultdict\nimport copy\nfrom itertools import repeat, chain\nimport math\nimport numpy as np\n\nimport torch\nimport torch.utils.data\nfrom torch.utils.data.sampler import BatchSampler, Sampler\nfrom torch.utils.model_zoo import tqdm\nimport torchvision\n\nfrom PIL import Image\n\n\ndef _repeat_to_at_least(iterable, n):\n    repeat_times = math.ceil(n / len(iterable))\n    repeated = chain.from_iterable(repeat(iterable, repeat_times))\n    return list(repeated)\n\n\nclass GroupedBatchSampler(BatchSampler):\n    \"\"\"\n    Wraps another sampler to yield a mini-batch of indices.\n    It enforces that the batch only contain elements from the same group.\n    It also tries to provide mini-batches which follows an ordering which is\n    as close as possible to the ordering from the original sampler.\n    Arguments:\n        sampler (Sampler): Base sampler.\n        group_ids (list[int]): If the sampler produces indices in range [0, N),\n            `group_ids` must be a list of `N` ints which contains the group id of each sample.\n            The group ids must be a continuous set of integers starting from\n            0, i.e. they must be in the range [0, num_groups).\n        batch_size (int): Size of mini-batch.\n    \"\"\"\n    def __init__(self, sampler, group_ids, batch_size):\n        if not isinstance(sampler, Sampler):\n            raise ValueError(\n                \"sampler should be an instance of \"\n                \"torch.utils.data.Sampler, but got sampler={}\".format(sampler)\n            )\n        self.sampler = sampler\n        self.group_ids = group_ids\n        self.batch_size = batch_size\n\n    def __iter__(self):\n        buffer_per_group = defaultdict(list)\n        samples_per_group = defaultdict(list)\n\n        num_batches = 0\n        for idx in self.sampler:\n            group_id = self.group_ids[idx]\n            buffer_per_group[group_id].append(idx)\n            samples_per_group[group_id].append(idx)\n            if len(buffer_per_group[group_id]) == self.batch_size:\n                yield buffer_per_group[group_id]\n                num_batches += 1\n                del buffer_per_group[group_id]\n            assert len(buffer_per_group[group_id]) < self.batch_size\n\n        # now we have run out of elements that satisfy\n        # the group criteria, let's return the remaining\n        # elements so that the size of the sampler is\n        # deterministic\n        expected_num_batches = len(self)\n        num_remaining = expected_num_batches - num_batches\n        if num_remaining > 0:\n            # for the remaining batches, take first the buffers with largest number\n            # of elements\n            for group_id, _ in sorted(buffer_per_group.items(),\n                                      key=lambda x: len(x[1]), reverse=True):\n                remaining = self.batch_size - len(buffer_per_group[group_id])\n                samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining)\n                buffer_per_group[group_id].extend(samples_from_group_id[:remaining])\n                assert len(buffer_per_group[group_id]) == self.batch_size\n                yield buffer_per_group[group_id]\n                num_remaining -= 1\n                if num_remaining == 0:\n                    break\n        assert num_remaining == 0\n\n    def __len__(self):\n        return len(self.sampler) // self.batch_size\n\n\ndef _compute_aspect_ratios_slow(dataset, indices=None):\n    print(\"Your dataset doesn't support the fast path for \"\n          \"computing the aspect ratios, so will iterate over \"\n          \"the full dataset and load every image instead. \"\n          \"This might take some time...\")\n    if indices is None:\n        indices = range(len(dataset))\n\n    class SubsetSampler(Sampler):\n        def __init__(self, indices):\n            self.indices = indices\n\n        def __iter__(self):\n            return iter(self.indices)\n\n        def __len__(self):\n            return len(self.indices)\n\n    sampler = SubsetSampler(indices)\n    data_loader = torch.utils.data.DataLoader(\n        dataset, batch_size=1, sampler=sampler,\n        num_workers=14,  # you might want to increase it for faster processing\n        collate_fn=lambda x: x[0])\n    aspect_ratios = []\n    with tqdm(total=len(dataset)) as pbar:\n        for _i, (img, _) in enumerate(data_loader):\n            pbar.update(1)\n            height, width = img.shape[-2:]\n            aspect_ratio = float(width) / float(height)\n            aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_custom_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n    aspect_ratios = []\n    for i in indices:\n        height, width = dataset.get_height_and_width(i)\n        aspect_ratio = float(width) / float(height)\n        aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_coco_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n    aspect_ratios = []\n    for i in indices:\n        img_info = dataset.coco.imgs[dataset.ids[i]]\n        aspect_ratio = float(img_info[\"width\"]) / float(img_info[\"height\"])\n        aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_voc_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n    aspect_ratios = []\n    for i in indices:\n        # this doesn't load the data into memory, because PIL loads it lazily\n        width, height = Image.open(dataset.images[i]).size\n        aspect_ratio = float(width) / float(height)\n        aspect_ratios.append(aspect_ratio)\n    return aspect_ratios\n\n\ndef _compute_aspect_ratios_subset_dataset(dataset, indices=None):\n    if indices is None:\n        indices = range(len(dataset))\n\n    ds_indices = [dataset.indices[i] for i in indices]\n    return compute_aspect_ratios(dataset.dataset, ds_indices)\n\n\ndef compute_aspect_ratios(dataset, indices=None):\n    if hasattr(dataset, \"get_height_and_width\"):\n        return _compute_aspect_ratios_custom_dataset(dataset, indices)\n\n    if isinstance(dataset, torchvision.datasets.CocoDetection):\n        return _compute_aspect_ratios_coco_dataset(dataset, indices)\n\n    if isinstance(dataset, torchvision.datasets.VOCDetection):\n        return _compute_aspect_ratios_voc_dataset(dataset, indices)\n\n    if isinstance(dataset, torch.utils.data.Subset):\n        return _compute_aspect_ratios_subset_dataset(dataset, indices)\n\n    # slow path\n    return _compute_aspect_ratios_slow(dataset, indices)\n\n\ndef _quantize(x, bins):\n    bins = copy.deepcopy(bins)\n    bins = sorted(bins)\n    # bisect_right：寻找y元素按顺序应该排在bins中哪个元素的右边，返回的是索引\n    quantized = list(map(lambda y: bisect.bisect_right(bins, y), x))\n    return quantized\n\n\ndef create_aspect_ratio_groups(dataset, k=0):\n    # 计算所有数据集中的图片width/height比例\n    aspect_ratios = compute_aspect_ratios(dataset)\n    # 将[0.5, 2]区间划分成2*k+1等份\n    bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0]\n\n    # 统计所有图像比例在bins区间中的位置索引\n    groups = _quantize(aspect_ratios, bins)\n    # count number of elements per group\n    # 统计每个区间的频次\n    counts = np.unique(groups, return_counts=True)[1]\n    fbins = [0] + bins + [np.inf]\n    print(\"Using {} as bins for aspect ratio quantization\".format(fbins))\n    print(\"Count of instances per bin: {}\".format(counts))\n    return groups\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/train_utils/train_eval_utils.py",
    "content": "import sys\n\nfrom torch.cuda import amp\nimport torch.nn.functional as F\n\nfrom build_utils.utils import *\nfrom .coco_eval import CocoEvaluator\nfrom .coco_utils import get_coco_api_from_dataset\nimport train_utils.distributed_utils as utils\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch,\n                    print_freq, accumulate, img_size,\n                    grid_min, grid_max, gs,\n                    multi_scale=False, warmup=False, scaler=None):\n    model.train()\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))\n    header = 'Epoch: [{}]'.format(epoch)\n\n    lr_scheduler = None\n    if epoch == 0 and warmup is True:  # 当训练第一轮（epoch=0）时，启用warmup训练方式，可理解为热身训练\n        warmup_factor = 1.0 / 1000\n        warmup_iters = min(1000, len(data_loader) - 1)\n\n        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)\n        accumulate = 1\n\n    mloss = torch.zeros(4).to(device)  # mean losses\n    now_lr = 0.\n    nb = len(data_loader)  # number of batches\n    # imgs: [batch_size, 3, img_size, img_size]\n    # targets: [num_obj, 6] , that number 6 means -> (img_index, obj_index, x, y, w, h)\n    # paths: list of img path\n    for i, (imgs, targets, paths, _, _) in enumerate(metric_logger.log_every(data_loader, print_freq, header)):\n        # ni 统计从epoch0开始的所有batch数\n        ni = i + nb * epoch  # number integrated batches (since train start)\n        imgs = imgs.to(device).float() / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0\n        targets = targets.to(device)\n\n        # Multi-Scale\n        if multi_scale:\n            # 每训练64张图片，就随机修改一次输入图片大小，\n            # 由于label已转为相对坐标，故缩放图片不影响label的值\n            if ni % accumulate == 0:  # adjust img_size (67% - 150%) every 1 batch\n                # 在给定最大最小输入尺寸范围内随机选取一个size(size为32的整数倍)\n                img_size = random.randrange(grid_min, grid_max + 1) * gs\n            sf = img_size / max(imgs.shape[2:])  # scale factor\n\n            # 如果图片最大边长不等于img_size, 则缩放图片，并将长和宽调整到32的整数倍\n            if sf != 1:\n                # gs: (pixels) grid size\n                ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to 32-multiple)\n                imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)\n\n        # 混合精度训练上下文管理器，如果在CPU环境中不起任何作用\n        with amp.autocast(enabled=scaler is not None):\n            pred = model(imgs)\n\n            # loss\n            loss_dict = compute_loss(pred, targets, model)\n            losses = sum(loss for loss in loss_dict.values())\n\n        # reduce losses over all GPUs for logging purpose\n        loss_dict_reduced = utils.reduce_dict(loss_dict)\n        losses_reduced = sum(loss for loss in loss_dict_reduced.values())\n        loss_items = torch.cat((loss_dict_reduced[\"box_loss\"],\n                                loss_dict_reduced[\"obj_loss\"],\n                                loss_dict_reduced[\"class_loss\"],\n                                losses_reduced)).detach()\n        mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses\n\n        if not torch.isfinite(losses_reduced):\n            print('WARNING: non-finite loss, ending training ', loss_dict_reduced)\n            print(\"training image path: {}\".format(\",\".join(paths)))\n            sys.exit(1)\n\n        losses *= 1. / accumulate  # scale loss\n\n        # backward\n        if scaler is not None:\n            scaler.scale(losses).backward()\n        else:\n            losses.backward()\n\n        # optimize\n        # 每训练64张图片更新一次权重\n        if ni % accumulate == 0:\n            if scaler is not None:\n                scaler.step(optimizer)\n                scaler.update()\n            else:\n                optimizer.step()\n            optimizer.zero_grad()\n\n        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)\n        now_lr = optimizer.param_groups[0][\"lr\"]\n        metric_logger.update(lr=now_lr)\n\n        if ni % accumulate == 0 and lr_scheduler is not None:  # 第一轮使用warmup训练方式\n            lr_scheduler.step()\n\n    return mloss, now_lr\n\n\n@torch.no_grad()\ndef evaluate(model, data_loader, coco=None, device=None):\n    cpu_device = torch.device(\"cpu\")\n    model.eval()\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    header = \"Test: \"\n\n    if coco is None:\n        coco = get_coco_api_from_dataset(data_loader.dataset)\n    iou_types = _get_iou_types(model)\n    coco_evaluator = CocoEvaluator(coco, iou_types)\n\n    for imgs, targets, paths, shapes, img_index in metric_logger.log_every(data_loader, 100, header):\n        imgs = imgs.to(device).float() / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0\n        # targets = targets.to(device)\n\n        # 当使用CPU时，跳过GPU相关指令\n        if device != torch.device(\"cpu\"):\n            torch.cuda.synchronize(device)\n\n        model_time = time.time()\n        pred = model(imgs)[0]  # only get inference result\n        pred = non_max_suppression(pred, conf_thres=0.01, iou_thres=0.6, multi_label=False)\n        model_time = time.time() - model_time\n\n        outputs = []\n        for index, p in enumerate(pred):\n            if p is None:\n                p = torch.empty((0, 6), device=cpu_device)\n                boxes = torch.empty((0, 4), device=cpu_device)\n            else:\n                # xmin, ymin, xmax, ymax\n                boxes = p[:, :4]\n                # shapes: (h0, w0), ((h / h0, w / w0), pad)\n                # 将boxes信息还原回原图尺度，这样计算的mAP才是准确的\n                boxes = scale_coords(imgs[index].shape[1:], boxes, shapes[index][0]).round()\n\n            # 注意这里传入的boxes格式必须是xmin, ymin, xmax, ymax，且为绝对坐标\n            info = {\"boxes\": boxes.to(cpu_device),\n                    \"labels\": p[:, 5].to(device=cpu_device, dtype=torch.int64),\n                    \"scores\": p[:, 4].to(cpu_device)}\n            outputs.append(info)\n\n        res = {img_id: output for img_id, output in zip(img_index, outputs)}\n\n        evaluator_time = time.time()\n        coco_evaluator.update(res)\n        evaluator_time = time.time() - evaluator_time\n        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)\n\n    # gather the stats from all processes\n    metric_logger.synchronize_between_processes()\n    print(\"Averaged stats:\", metric_logger)\n    coco_evaluator.synchronize_between_processes()\n\n    # accumulate predictions from all images\n    coco_evaluator.accumulate()\n    coco_evaluator.summarize()\n\n    result_info = coco_evaluator.coco_eval[iou_types[0]].stats.tolist()  # numpy to list\n\n    return result_info\n\n\ndef _get_iou_types(model):\n    model_without_ddp = model\n    if isinstance(model, torch.nn.parallel.DistributedDataParallel):\n        model_without_ddp = model.module\n    iou_types = [\"bbox\"]\n    return iou_types\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/trans_voc2yolo.py",
    "content": "\"\"\"\n本脚本有两个功能：\n1.将voc数据集标注信息(.xml)转为yolo标注格式(.txt)，并将图像文件复制到相应文件夹\n2.根据json标签文件，生成对应names标签(my_data_label.names)\n\"\"\"\nimport os\nfrom tqdm import tqdm\nfrom lxml import etree\nimport json\nimport shutil\n\n\n# voc数据集根目录以及版本\nvoc_root = \"/data/VOCdevkit\"\nvoc_version = \"VOC2012\"\n\n# 转换的训练集以及验证集对应txt文件\ntrain_txt = \"train.txt\"\nval_txt = \"val.txt\"\n\n# 转换后的文件保存目录\nsave_file_root = \"./my_yolo_dataset\"\n\n# label标签对应json文件\nlabel_json_path = './data/pascal_voc_classes.json'\n\n# 拼接出voc的images目录，xml目录，txt目录\nvoc_images_path = os.path.join(voc_root, voc_version, \"JPEGImages\")\nvoc_xml_path = os.path.join(voc_root, voc_version, \"Annotations\")\ntrain_txt_path = os.path.join(voc_root, voc_version, \"ImageSets\", \"Main\", train_txt)\nval_txt_path = os.path.join(voc_root, voc_version, \"ImageSets\", \"Main\", val_txt)\n\n# 检查文件/文件夹都是否存在\nassert os.path.exists(voc_images_path), \"VOC images path not exist...\"\nassert os.path.exists(voc_xml_path), \"VOC xml path not exist...\"\nassert os.path.exists(train_txt_path), \"VOC train txt file not exist...\"\nassert os.path.exists(val_txt_path), \"VOC val txt file not exist...\"\nassert os.path.exists(label_json_path), \"label_json_path does not exist...\"\nif os.path.exists(save_file_root) is False:\n    os.makedirs(save_file_root)\n\n\ndef parse_xml_to_dict(xml):\n    \"\"\"\n    将xml文件解析成字典形式，参考tensorflow的recursive_parse_xml_to_dict\n    Args：\n        xml: xml tree obtained by parsing XML file contents using lxml.etree\n\n    Returns:\n        Python dictionary holding XML contents.\n    \"\"\"\n\n    if len(xml) == 0:  # 遍历到底层，直接返回tag对应的信息\n        return {xml.tag: xml.text}\n\n    result = {}\n    for child in xml:\n        child_result = parse_xml_to_dict(child)  # 递归遍历标签信息\n        if child.tag != 'object':\n            result[child.tag] = child_result[child.tag]\n        else:\n            if child.tag not in result:  # 因为object可能有多个，所以需要放入列表里\n                result[child.tag] = []\n            result[child.tag].append(child_result[child.tag])\n    return {xml.tag: result}\n\n\ndef translate_info(file_names: list, save_root: str, class_dict: dict, train_val='train'):\n    \"\"\"\n    将对应xml文件信息转为yolo中使用的txt文件信息\n    :param file_names:\n    :param save_root:\n    :param class_dict:\n    :param train_val:\n    :return:\n    \"\"\"\n    save_txt_path = os.path.join(save_root, train_val, \"labels\")\n    if os.path.exists(save_txt_path) is False:\n        os.makedirs(save_txt_path)\n    save_images_path = os.path.join(save_root, train_val, \"images\")\n    if os.path.exists(save_images_path) is False:\n        os.makedirs(save_images_path)\n\n    for file in tqdm(file_names, desc=\"translate {} file...\".format(train_val)):\n        # 检查下图像文件是否存在\n        img_path = os.path.join(voc_images_path, file + \".jpg\")\n        assert os.path.exists(img_path), \"file:{} not exist...\".format(img_path)\n\n        # 检查xml文件是否存在\n        xml_path = os.path.join(voc_xml_path, file + \".xml\")\n        assert os.path.exists(xml_path), \"file:{} not exist...\".format(xml_path)\n\n        # read xml\n        with open(xml_path) as fid:\n            xml_str = fid.read()\n        xml = etree.fromstring(xml_str)\n        data = parse_xml_to_dict(xml)[\"annotation\"]\n        img_height = int(data[\"size\"][\"height\"])\n        img_width = int(data[\"size\"][\"width\"])\n\n        # write object info into txt\n        assert \"object\" in data.keys(), \"file: '{}' lack of object key.\".format(xml_path)\n        if len(data[\"object\"]) == 0:\n            # 如果xml文件中没有目标就直接忽略该样本\n            print(\"Warning: in '{}' xml, there are no objects.\".format(xml_path))\n            continue\n\n        with open(os.path.join(save_txt_path, file + \".txt\"), \"w\") as f:\n            for index, obj in enumerate(data[\"object\"]):\n                # 获取每个object的box信息\n                xmin = float(obj[\"bndbox\"][\"xmin\"])\n                xmax = float(obj[\"bndbox\"][\"xmax\"])\n                ymin = float(obj[\"bndbox\"][\"ymin\"])\n                ymax = float(obj[\"bndbox\"][\"ymax\"])\n                class_name = obj[\"name\"]\n                class_index = class_dict[class_name] - 1  # 目标id从0开始\n\n                # 进一步检查数据，有的标注信息中可能有w或h为0的情况，这样的数据会导致计算回归loss为nan\n                if xmax <= xmin or ymax <= ymin:\n                    print(\"Warning: in '{}' xml, there are some bbox w/h <=0\".format(xml_path))\n                    continue\n\n                # 将box信息转换到yolo格式\n                xcenter = xmin + (xmax - xmin) / 2\n                ycenter = ymin + (ymax - ymin) / 2\n                w = xmax - xmin\n                h = ymax - ymin\n\n                # 绝对坐标转相对坐标，保存6位小数\n                xcenter = round(xcenter / img_width, 6)\n                ycenter = round(ycenter / img_height, 6)\n                w = round(w / img_width, 6)\n                h = round(h / img_height, 6)\n\n                info = [str(i) for i in [class_index, xcenter, ycenter, w, h]]\n\n                if index == 0:\n                    f.write(\" \".join(info))\n                else:\n                    f.write(\"\\n\" + \" \".join(info))\n\n        # copy image into save_images_path\n        path_copy_to = os.path.join(save_images_path, img_path.split(os.sep)[-1])\n        if os.path.exists(path_copy_to) is False:\n            shutil.copyfile(img_path, path_copy_to)\n\n\ndef create_class_names(class_dict: dict):\n    keys = class_dict.keys()\n    with open(\"./data/my_data_label.names\", \"w\") as w:\n        for index, k in enumerate(keys):\n            if index + 1 == len(keys):\n                w.write(k)\n            else:\n                w.write(k + \"\\n\")\n\n\ndef main():\n    # read class_indict\n    json_file = open(label_json_path, 'r')\n    class_dict = json.load(json_file)\n\n    # 读取train.txt中的所有行信息，删除空行\n    with open(train_txt_path, \"r\") as r:\n        train_file_names = [i for i in r.read().splitlines() if len(i.strip()) > 0]\n    # voc信息转yolo，并将图像文件复制到相应文件夹\n    translate_info(train_file_names, save_file_root, class_dict, \"train\")\n\n    # 读取val.txt中的所有行信息，删除空行\n    with open(val_txt_path, \"r\") as r:\n        val_file_names = [i for i in r.read().splitlines() if len(i.strip()) > 0]\n    # voc信息转yolo，并将图像文件复制到相应文件夹\n    translate_info(val_file_names, save_file_root, class_dict, \"val\")\n\n    # 创建my_data_label.names文件\n    create_class_names(class_dict)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "pytorch_object_detection/yolov3_spp/validation.py",
    "content": "\"\"\"\n该脚本用于调用训练好的模型权重去计算验证集/测试集的COCO指标\n以及每个类别的mAP(IoU=0.5)\n\"\"\"\nimport json\n\nfrom models import *\nfrom build_utils.datasets import *\nfrom build_utils.utils import *\nfrom train_utils import get_coco_api_from_dataset, CocoEvaluator\n\n\ndef summarize(self, catId=None):\n    \"\"\"\n    Compute and display summary metrics for evaluation results.\n    Note this functin can *only* be applied on the default parameter setting\n    \"\"\"\n\n    def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100):\n        p = self.params\n        iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'\n        titleStr = 'Average Precision' if ap == 1 else 'Average Recall'\n        typeStr = '(AP)' if ap == 1 else '(AR)'\n        iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \\\n            if iouThr is None else '{:0.2f}'.format(iouThr)\n\n        aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]\n        mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]\n\n        if ap == 1:\n            # dimension of precision: [TxRxKxAxM]\n            s = self.eval['precision']\n            # IoU\n            if iouThr is not None:\n                t = np.where(iouThr == p.iouThrs)[0]\n                s = s[t]\n\n            if isinstance(catId, int):\n                s = s[:, :, catId, aind, mind]\n            else:\n                s = s[:, :, :, aind, mind]\n\n        else:\n            # dimension of recall: [TxKxAxM]\n            s = self.eval['recall']\n            if iouThr is not None:\n                t = np.where(iouThr == p.iouThrs)[0]\n                s = s[t]\n\n            if isinstance(catId, int):\n                s = s[:, catId, aind, mind]\n            else:\n                s = s[:, :, aind, mind]\n\n        if len(s[s > -1]) == 0:\n            mean_s = -1\n        else:\n            mean_s = np.mean(s[s > -1])\n\n        print_string = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)\n        return mean_s, print_string\n\n    stats, print_list = [0] * 12, [\"\"] * 12\n    stats[0], print_list[0] = _summarize(1)\n    stats[1], print_list[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2])\n    stats[2], print_list[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2])\n    stats[3], print_list[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2])\n    stats[4], print_list[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2])\n    stats[5], print_list[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2])\n    stats[6], print_list[6] = _summarize(0, maxDets=self.params.maxDets[0])\n    stats[7], print_list[7] = _summarize(0, maxDets=self.params.maxDets[1])\n    stats[8], print_list[8] = _summarize(0, maxDets=self.params.maxDets[2])\n    stats[9], print_list[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2])\n    stats[10], print_list[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2])\n    stats[11], print_list[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2])\n\n    print_info = \"\\n\".join(print_list)\n\n    if not self.eval:\n        raise Exception('Please run accumulate() first')\n\n    return stats, print_info\n\n\ndef main(parser_data):\n    device = torch.device(parser_data.device if torch.cuda.is_available() else \"cpu\")\n    print(\"Using {} device training.\".format(device.type))\n\n    # read class_indict\n    label_json_path = './data/pascal_voc_classes.json'\n    assert os.path.exists(label_json_path), \"json file {} dose not exist.\".format(label_json_path)\n    with open(label_json_path, 'r') as f:\n        class_dict = json.load(f)\n\n    category_index = {v: k for k, v in class_dict.items()}\n\n    data_dict = parse_data_cfg(parser_data.data)\n    test_path = data_dict[\"valid\"]\n\n    # 注意这里的collate_fn是自定义的，因为读取的数据包括image和targets，不能直接使用默认的方法合成batch\n    batch_size = parser_data.batch_size\n    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers\n    print('Using %g dataloader workers' % nw)\n\n    # load validation data set\n    val_dataset = LoadImagesAndLabels(test_path, parser_data.img_size, batch_size,\n                                      hyp=parser_data.hyp,\n                                      rect=True)  # 将每个batch的图像调整到合适大小，可减少运算量(并不是512x512标准尺寸)\n\n    val_dataset_loader = torch.utils.data.DataLoader(val_dataset,\n                                                     batch_size=batch_size,\n                                                     shuffle=False,\n                                                     num_workers=nw,\n                                                     pin_memory=True,\n                                                     collate_fn=val_dataset.collate_fn)\n\n    # create model\n    model = Darknet(parser_data.cfg, parser_data.img_size)\n    weights_dict = torch.load(parser_data.weights, map_location='cpu')\n    weights_dict = weights_dict[\"model\"] if \"model\" in weights_dict else weights_dict\n    model.load_state_dict(weights_dict)\n    model.to(device)\n\n    # evaluate on the test dataset\n    coco = get_coco_api_from_dataset(val_dataset)\n    iou_types = [\"bbox\"]\n    coco_evaluator = CocoEvaluator(coco, iou_types)\n    cpu_device = torch.device(\"cpu\")\n\n    model.eval()\n    with torch.no_grad():\n        for imgs, targets, paths, shapes, img_index in tqdm(val_dataset_loader, desc=\"validation...\"):\n            imgs = imgs.to(device).float() / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0\n\n            pred = model(imgs)[0]  # only get inference result\n            pred = non_max_suppression(pred, conf_thres=0.01, iou_thres=0.6, multi_label=False)\n\n            outputs = []\n            for index, p in enumerate(pred):\n                if p is None:\n                    p = torch.empty((0, 6), device=cpu_device)\n                    boxes = torch.empty((0, 4), device=cpu_device)\n                else:\n                    # xmin, ymin, xmax, ymax\n                    boxes = p[:, :4]\n                    # shapes: (h0, w0), ((h / h0, w / w0), pad)\n                    # 将boxes信息还原回原图尺度，这样计算的mAP才是准确的\n                    boxes = scale_coords(imgs[index].shape[1:], boxes, shapes[index][0]).round()\n\n                # 注意这里传入的boxes格式必须是xmin, ymin, xmax, ymax，且为绝对坐标\n                info = {\"boxes\": boxes.to(cpu_device),\n                        \"labels\": p[:, 5].to(device=cpu_device, dtype=torch.int64),\n                        \"scores\": p[:, 4].to(cpu_device)}\n                outputs.append(info)\n\n            res = {img_id: output for img_id, output in zip(img_index, outputs)}\n\n            coco_evaluator.update(res)\n\n    coco_evaluator.synchronize_between_processes()\n\n    # accumulate predictions from all images\n    coco_evaluator.accumulate()\n    coco_evaluator.summarize()\n\n    coco_eval = coco_evaluator.coco_eval[\"bbox\"]\n    # calculate COCO info for all classes\n    coco_stats, print_coco = summarize(coco_eval)\n\n    # calculate voc info for every classes(IoU=0.5)\n    voc_map_info_list = []\n    for i in range(len(category_index)):\n        stats, _ = summarize(coco_eval, catId=i)\n        voc_map_info_list.append(\" {:15}: {}\".format(category_index[i + 1], stats[1]))\n\n    print_voc = \"\\n\".join(voc_map_info_list)\n    print(print_voc)\n\n    # 将验证结果保存至txt文件中\n    with open(\"record_mAP.txt\", \"w\") as f:\n        record_lines = [\"COCO results:\",\n                        print_coco,\n                        \"\",\n                        \"mAP(IoU=0.5) for each category:\",\n                        print_voc]\n        f.write(\"\\n\".join(record_lines))\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 使用设备类型\n    parser.add_argument('--device', default='cuda', help='device')\n\n    # 检测目标类别数\n    parser.add_argument('--num-classes', type=int, default='20', help='number of classes')\n\n    parser.add_argument('--cfg', type=str, default='cfg/my_yolov3.cfg', help=\"*.cfg path\")\n    parser.add_argument('--data', type=str, default='data/my_data.data', help='*.data path')\n    parser.add_argument('--hyp', type=str, default='cfg/hyp.yaml', help='hyperparameters path')\n    parser.add_argument('--img-size', type=int, default=512, help='test size')\n\n    # 训练好的权重文件\n    parser.add_argument('--weights', default='./weights/yolov3spp-voc-512.pt', type=str, help='training weights')\n\n    # batch size\n    parser.add_argument('--batch_size', default=1, type=int, metavar='N',\n                        help='batch size when validation.')\n\n    args = parser.parse_args()\n\n    main(args)\n"
  },
  {
    "path": "pytorch_segmentation/deeplab_v3/README.md",
    "content": "# DeepLabV3(Rethinking Atrous Convolution for Semantic Image Segmentation)\n\n## 该项目主要是来自pytorch官方torchvision模块中的源码\n* https://github.com/pytorch/vision/tree/main/torchvision/models/segmentation\n\n## 环境配置：\n* Python3.6/3.7/3.8\n* Pytorch1.10\n* Ubuntu或Centos(Windows暂不支持多GPU训练)\n* 最好使用GPU训练\n* 详细环境配置见```requirements.txt```\n\n## 文件结构：\n```\n  ├── src: 模型的backbone以及DeepLabv3的搭建\n  ├── train_utils: 训练、验证以及多GPU训练相关模块\n  ├── my_dataset.py: 自定义dataset用于读取VOC数据集\n  ├── train.py: 以deeplabv3_resnet50为例进行训练\n  ├── train_multi_GPU.py: 针对使用多GPU的用户使用\n  ├── predict.py: 简易的预测脚本，使用训练好的权重进行预测测试\n  ├── validation.py: 利用训练好的权重验证/测试数据的mIoU等指标，并生成record_mAP.txt文件\n  └── pascal_voc_classes.json: pascal_voc标签文件\n```\n\n## 预训练权重下载地址：\n* 注意：官方提供的预训练权重是在COCO上预训练得到的，训练时只针对和PASCAL VOC相同的类别进行了训练，所以类别数是21(包括背景)\n* deeplabv3_resnet50: https://download.pytorch.org/models/deeplabv3_resnet50_coco-cd0a2569.pth\n* deeplabv3_resnet101: https://download.pytorch.org/models/deeplabv3_resnet101_coco-586e9e4e.pth\n* deeplabv3_mobilenetv3_large_coco: https://download.pytorch.org/models/deeplabv3_mobilenet_v3_large-fc3c493d.pth\n* 注意，下载的预训练权重记得要重命名，比如在train.py中读取的是```deeplabv3_resnet50_coco.pth```文件，\n  不是```deeplabv3_resnet50_coco-cd0a2569.pth```\n \n \n## 数据集，本例程使用的是PASCAL VOC2012数据集\n* Pascal VOC2012 train/val数据集下载地址：http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar\n* 如果不了解数据集或者想使用自己的数据集进行训练，请参考我的博文: https://blog.csdn.net/qq_37541097/article/details/115787033\n\n## 训练方法\n* 确保提前准备好数据集\n* 确保提前下载好对应预训练模型权重\n* 若要使用单GPU或者CPU训练，直接使用train.py训练脚本\n* 若要使用多GPU训练，使用```torchrun --nproc_per_node=8 train_multi_GPU.py```指令,```nproc_per_node```参数为使用GPU数量\n* 如果想指定使用哪些GPU设备可在指令前加上```CUDA_VISIBLE_DEVICES=0,3```(例如我只要使用设备中的第1块和第4块GPU设备)\n* ```CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py```\n\n## 注意事项\n* 在使用训练脚本时，注意要将'--data-path'(VOC_root)设置为自己存放'VOCdevkit'文件夹所在的**根目录**\n* 在使用预测脚本时，要将'weights_path'设置为你自己生成的权重路径。\n* 使用validation文件时，注意确保你的验证集或者测试集中必须包含每个类别的目标，并且使用时只需要修改'--num-classes'、'--aux'、'--data-path'和'--weights'即可，其他代码尽量不要改动\n\n## 如果对DeepLabV3原理不是很理解可参考我的bilibili\n* https://www.bilibili.com/video/BV1Jb4y1q7j7\n\n\n## 进一步了解该项目，以及对DeepLabV3代码的分析可参考我的bilibili\n* https://www.bilibili.com/video/BV1TD4y1c7Wx\n\n## Pytorch官方实现的DeeplabV3网络框架图\n![deeplabv3_resnet50_pytorch](./deeplabv3_resnet50.png)\n"
  },
  {
    "path": "pytorch_segmentation/deeplab_v3/get_palette.py",
    "content": "import json\nimport numpy as np\nfrom PIL import Image\n\n# 读取mask标签\ntarget = Image.open(\"./2007_001288.png\")\n# 获取调色板\npalette = target.getpalette()\npalette = np.reshape(palette, (-1, 3)).tolist()\n# 转换成字典子形式\npd = dict((i, color) for i, color in enumerate(palette))\n\njson_str = json.dumps(pd)\nwith open(\"palette.json\", \"w\") as f:\n    f.write(json_str)\n\n# target = np.array(target)\n# print(target)\n"
  },
  {
    "path": "pytorch_segmentation/deeplab_v3/my_dataset.py",
    "content": "import os\n\nimport torch.utils.data as data\nfrom PIL import Image\n\n\nclass VOCSegmentation(data.Dataset):\n    def __init__(self, voc_root, year=\"2012\", transforms=None, txt_name: str = \"train.txt\"):\n        super(VOCSegmentation, self).__init__()\n        assert year in [\"2007\", \"2012\"], \"year must be in ['2007', '2012']\"\n        root = os.path.join(voc_root, \"VOCdevkit\", f\"VOC{year}\")\n        assert os.path.exists(root), \"path '{}' does not exist.\".format(root)\n        image_dir = os.path.join(root, 'JPEGImages')\n        mask_dir = os.path.join(root, 'SegmentationClass')\n\n        txt_path = os.path.join(root, \"ImageSets\", \"Segmentation\", txt_name)\n        assert os.path.exists(txt_path), \"file '{}' does not exist.\".format(txt_path)\n        with open(os.path.join(txt_path), \"r\") as f:\n            file_names = [x.strip() for x in f.readlines() if len(x.strip()) > 0]\n\n        self.images = [os.path.join(image_dir, x + \".jpg\") for x in file_names]\n        self.masks = [os.path.join(mask_dir, x + \".png\") for x in file_names]\n        assert (len(self.images) == len(self.masks))\n        self.transforms = transforms\n\n    def __getitem__(self, index):\n        \"\"\"\n        Args:\n            index (int): Index\n\n        Returns:\n            tuple: (image, target) where target is the image segmentation.\n        \"\"\"\n        img = Image.open(self.images[index]).convert('RGB')\n        target = Image.open(self.masks[index])\n\n        if self.transforms is not None:\n            img, target = self.transforms(img, target)\n\n        return img, target\n\n    def __len__(self):\n        return len(self.images)\n\n    @staticmethod\n    def collate_fn(batch):\n        images, targets = list(zip(*batch))\n        batched_imgs = cat_list(images, fill_value=0)\n        batched_targets = cat_list(targets, fill_value=255)\n        return batched_imgs, batched_targets\n\n\ndef cat_list(images, fill_value=0):\n    max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))\n    batch_shape = (len(images),) + max_size\n    batched_imgs = images[0].new(*batch_shape).fill_(fill_value)\n    for img, pad_img in zip(images, batched_imgs):\n        pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img)\n    return batched_imgs\n\n\n# dataset = VOCSegmentation(voc_root=\"/data/\", transforms=get_transform(train=True))\n# d1 = dataset[0]\n# print(d1)\n"
  },
  {
    "path": "pytorch_segmentation/deeplab_v3/palette.json",
    "content": "{\"0\": [0, 0, 0], \"1\": [128, 0, 0], \"2\": [0, 128, 0], \"3\": [128, 128, 0], \"4\": [0, 0, 128], \"5\": [128, 0, 128], \"6\": [0, 128, 128], \"7\": [128, 128, 128], \"8\": [64, 0, 0], \"9\": [192, 0, 0], \"10\": [64, 128, 0], \"11\": [192, 128, 0], \"12\": [64, 0, 128], \"13\": [192, 0, 128], \"14\": [64, 128, 128], \"15\": [192, 128, 128], \"16\": [0, 64, 0], \"17\": [128, 64, 0], \"18\": [0, 192, 0], \"19\": [128, 192, 0], \"20\": [0, 64, 128], \"21\": [128, 64, 128], \"22\": [0, 192, 128], \"23\": [128, 192, 128], \"24\": [64, 64, 0], \"25\": [192, 64, 0], \"26\": [64, 192, 0], \"27\": [192, 192, 0], \"28\": [64, 64, 128], \"29\": [192, 64, 128], \"30\": [64, 192, 128], \"31\": [192, 192, 128], \"32\": [0, 0, 64], \"33\": [128, 0, 64], \"34\": [0, 128, 64], \"35\": [128, 128, 64], \"36\": [0, 0, 192], \"37\": [128, 0, 192], \"38\": [0, 128, 192], \"39\": [128, 128, 192], \"40\": [64, 0, 64], \"41\": [192, 0, 64], \"42\": [64, 128, 64], \"43\": [192, 128, 64], \"44\": [64, 0, 192], \"45\": [192, 0, 192], \"46\": [64, 128, 192], \"47\": [192, 128, 192], \"48\": [0, 64, 64], \"49\": [128, 64, 64], \"50\": [0, 192, 64], \"51\": [128, 192, 64], \"52\": [0, 64, 192], \"53\": [128, 64, 192], \"54\": [0, 192, 192], \"55\": [128, 192, 192], \"56\": [64, 64, 64], \"57\": [192, 64, 64], \"58\": [64, 192, 64], \"59\": [192, 192, 64], \"60\": [64, 64, 192], \"61\": [192, 64, 192], \"62\": [64, 192, 192], \"63\": [192, 192, 192], \"64\": [32, 0, 0], \"65\": [160, 0, 0], \"66\": [32, 128, 0], \"67\": [160, 128, 0], \"68\": [32, 0, 128], \"69\": [160, 0, 128], \"70\": [32, 128, 128], \"71\": [160, 128, 128], \"72\": [96, 0, 0], \"73\": [224, 0, 0], \"74\": [96, 128, 0], \"75\": [224, 128, 0], \"76\": [96, 0, 128], \"77\": [224, 0, 128], \"78\": [96, 128, 128], \"79\": [224, 128, 128], \"80\": [32, 64, 0], \"81\": [160, 64, 0], \"82\": [32, 192, 0], \"83\": [160, 192, 0], \"84\": [32, 64, 128], \"85\": [160, 64, 128], \"86\": [32, 192, 128], \"87\": [160, 192, 128], \"88\": [96, 64, 0], \"89\": [224, 64, 0], \"90\": [96, 192, 0], \"91\": [224, 192, 0], \"92\": [96, 64, 128], \"93\": [224, 64, 128], \"94\": [96, 192, 128], \"95\": [224, 192, 128], \"96\": [32, 0, 64], \"97\": [160, 0, 64], \"98\": [32, 128, 64], \"99\": [160, 128, 64], \"100\": [32, 0, 192], \"101\": [160, 0, 192], \"102\": [32, 128, 192], \"103\": [160, 128, 192], \"104\": [96, 0, 64], \"105\": [224, 0, 64], \"106\": [96, 128, 64], \"107\": [224, 128, 64], \"108\": [96, 0, 192], \"109\": [224, 0, 192], \"110\": [96, 128, 192], \"111\": [224, 128, 192], \"112\": [32, 64, 64], \"113\": [160, 64, 64], \"114\": [32, 192, 64], \"115\": [160, 192, 64], \"116\": [32, 64, 192], \"117\": [160, 64, 192], \"118\": [32, 192, 192], \"119\": [160, 192, 192], \"120\": [96, 64, 64], \"121\": [224, 64, 64], \"122\": [96, 192, 64], \"123\": [224, 192, 64], \"124\": [96, 64, 192], \"125\": [224, 64, 192], \"126\": [96, 192, 192], \"127\": [224, 192, 192], \"128\": [0, 32, 0], \"129\": [128, 32, 0], \"130\": [0, 160, 0], \"131\": [128, 160, 0], \"132\": [0, 32, 128], \"133\": [128, 32, 128], \"134\": [0, 160, 128], \"135\": [128, 160, 128], \"136\": [64, 32, 0], \"137\": [192, 32, 0], \"138\": [64, 160, 0], \"139\": [192, 160, 0], \"140\": [64, 32, 128], \"141\": [192, 32, 128], \"142\": [64, 160, 128], \"143\": [192, 160, 128], \"144\": [0, 96, 0], \"145\": [128, 96, 0], \"146\": [0, 224, 0], \"147\": [128, 224, 0], \"148\": [0, 96, 128], \"149\": [128, 96, 128], \"150\": [0, 224, 128], \"151\": [128, 224, 128], \"152\": [64, 96, 0], \"153\": [192, 96, 0], \"154\": [64, 224, 0], \"155\": [192, 224, 0], \"156\": [64, 96, 128], \"157\": [192, 96, 128], \"158\": [64, 224, 128], \"159\": [192, 224, 128], \"160\": [0, 32, 64], \"161\": [128, 32, 64], \"162\": [0, 160, 64], \"163\": [128, 160, 64], \"164\": [0, 32, 192], \"165\": [128, 32, 192], \"166\": [0, 160, 192], \"167\": [128, 160, 192], \"168\": [64, 32, 64], \"169\": [192, 32, 64], \"170\": [64, 160, 64], \"171\": [192, 160, 64], \"172\": [64, 32, 192], \"173\": [192, 32, 192], \"174\": [64, 160, 192], \"175\": [192, 160, 192], \"176\": [0, 96, 64], \"177\": [128, 96, 64], \"178\": [0, 224, 64], \"179\": [128, 224, 64], \"180\": [0, 96, 192], \"181\": [128, 96, 192], \"182\": [0, 224, 192], \"183\": [128, 224, 192], \"184\": [64, 96, 64], \"185\": [192, 96, 64], \"186\": [64, 224, 64], \"187\": [192, 224, 64], \"188\": [64, 96, 192], \"189\": [192, 96, 192], \"190\": [64, 224, 192], \"191\": [192, 224, 192], \"192\": [32, 32, 0], \"193\": [160, 32, 0], \"194\": [32, 160, 0], \"195\": [160, 160, 0], \"196\": [32, 32, 128], \"197\": [160, 32, 128], \"198\": [32, 160, 128], \"199\": [160, 160, 128], \"200\": [96, 32, 0], \"201\": [224, 32, 0], \"202\": [96, 160, 0], \"203\": [224, 160, 0], \"204\": [96, 32, 128], \"205\": [224, 32, 128], \"206\": [96, 160, 128], \"207\": [224, 160, 128], \"208\": [32, 96, 0], \"209\": [160, 96, 0], \"210\": [32, 224, 0], \"211\": [160, 224, 0], \"212\": [32, 96, 128], \"213\": [160, 96, 128], \"214\": [32, 224, 128], \"215\": [160, 224, 128], \"216\": [96, 96, 0], \"217\": [224, 96, 0], \"218\": [96, 224, 0], \"219\": [224, 224, 0], \"220\": [96, 96, 128], \"221\": [224, 96, 128], \"222\": [96, 224, 128], \"223\": [224, 224, 128], \"224\": [32, 32, 64], \"225\": [160, 32, 64], \"226\": [32, 160, 64], \"227\": [160, 160, 64], \"228\": [32, 32, 192], \"229\": [160, 32, 192], \"230\": [32, 160, 192], \"231\": [160, 160, 192], \"232\": [96, 32, 64], \"233\": [224, 32, 64], \"234\": [96, 160, 64], \"235\": [224, 160, 64], \"236\": [96, 32, 192], \"237\": [224, 32, 192], \"238\": [96, 160, 192], \"239\": [224, 160, 192], \"240\": [32, 96, 64], \"241\": [160, 96, 64], \"242\": [32, 224, 64], \"243\": [160, 224, 64], \"244\": [32, 96, 192], \"245\": [160, 96, 192], \"246\": [32, 224, 192], \"247\": [160, 224, 192], \"248\": [96, 96, 64], \"249\": [224, 96, 64], \"250\": [96, 224, 64], \"251\": [224, 224, 64], \"252\": [96, 96, 192], \"253\": [224, 96, 192], \"254\": [96, 224, 192], \"255\": [224, 224, 192]}"
  },
  {
    "path": "pytorch_segmentation/deeplab_v3/pascal_voc_classes.json",
    "content": "{\n    \"aeroplane\": 1,\n    \"bicycle\": 2,\n    \"bird\": 3,\n    \"boat\": 4,\n    \"bottle\": 5,\n    \"bus\": 6,\n    \"car\": 7,\n    \"cat\": 8,\n    \"chair\": 9,\n    \"cow\": 10,\n    \"diningtable\": 11,\n    \"dog\": 12,\n    \"horse\": 13,\n    \"motorbike\": 14,\n    \"person\": 15,\n    \"pottedplant\": 16,\n    \"sheep\": 17,\n    \"sofa\": 18,\n    \"train\": 19,\n    \"tvmonitor\": 20\n}"
  },
  {
    "path": "pytorch_segmentation/deeplab_v3/predict.py",
    "content": "import os\nimport time\nimport json\n\nimport torch\nfrom torchvision import transforms\nimport numpy as np\nfrom PIL import Image\n\nfrom src import deeplabv3_resnet50\n\n\ndef time_synchronized():\n    torch.cuda.synchronize() if torch.cuda.is_available() else None\n    return time.time()\n\n\ndef main():\n    aux = False  # inference time not need aux_classifier\n    classes = 20\n    weights_path = \"./save_weights/model_29.pth\"\n    img_path = \"./test.jpg\"\n    palette_path = \"./palette.json\"\n    assert os.path.exists(weights_path), f\"weights {weights_path} not found.\"\n    assert os.path.exists(img_path), f\"image {img_path} not found.\"\n    assert os.path.exists(palette_path), f\"palette {palette_path} not found.\"\n    with open(palette_path, \"rb\") as f:\n        pallette_dict = json.load(f)\n        pallette = []\n        for v in pallette_dict.values():\n            pallette += v\n\n    # get devices\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n    print(\"using {} device.\".format(device))\n\n    # create model\n    model = deeplabv3_resnet50(aux=aux, num_classes=classes+1)\n\n    # delete weights about aux_classifier\n    weights_dict = torch.load(weights_path, map_location='cpu')['model']\n    for k in list(weights_dict.keys()):\n        if \"aux\" in k:\n            del weights_dict[k]\n\n    # load weights\n    model.load_state_dict(weights_dict)\n    model.to(device)\n\n    # load image\n    original_img = Image.open(img_path)\n\n    # from pil image to tensor and normalize\n    data_transform = transforms.Compose([transforms.Resize(520),\n                                         transforms.ToTensor(),\n                                         transforms.Normalize(mean=(0.485, 0.456, 0.406),\n                                                              std=(0.229, 0.224, 0.225))])\n    img = data_transform(original_img)\n    # expand batch dimension\n    img = torch.unsqueeze(img, dim=0)\n\n    model.eval()  # 进入验证模式\n    with torch.no_grad():\n        # init model\n        img_height, img_width = img.shape[-2:]\n        init_img = torch.zeros((1, 3, img_height, img_width), device=device)\n        model(init_img)\n\n        t_start = time_synchronized()\n        output = model(img.to(device))\n        t_end = time_synchronized()\n        print(\"inference time: {}\".format(t_end - t_start))\n\n        prediction = output['out'].argmax(1).squeeze(0)\n        prediction = prediction.to(\"cpu\").numpy().astype(np.uint8)\n        mask = Image.fromarray(prediction)\n        mask.putpalette(pallette)\n        mask.save(\"test_result.png\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_segmentation/deeplab_v3/requirements.txt",
    "content": "numpy==1.22.0\ntorch==1.10.0\ntorchvision==0.11.1\nPillow\n"
  },
  {
    "path": "pytorch_segmentation/deeplab_v3/results20211027-104607.txt",
    "content": "[epoch: 0]\ntrain_loss: 0.7098\nlr: 0.000100\nglobal correct: 94.7\naverage row correct: ['97.0', '93.9', '86.4', '93.1', '74.4', '64.0', '97.9', '84.7', '95.7', '63.8', '92.2', '68.8', '88.8', '90.5', '93.0', '95.3', '75.3', '94.2', '83.6', '91.9', '75.0']\nIoU: ['93.9', '91.5', '42.6', '88.9', '65.3', '60.4', '95.9', '76.3', '90.7', '50.9', '87.3', '54.1', '86.1', '83.0', '87.2', '89.3', '64.3', '91.0', '58.9', '84.5', '73.6']\nmean IoU: 76.9\n\n[epoch: 1]\ntrain_loss: 0.6005\nlr: 0.000077\nglobal correct: 94.7\naverage row correct: ['96.1', '96.3', '84.1', '95.1', '84.1', '81.4', '98.6', '85.8', '96.6', '68.7', '91.9', '71.3', '93.9', '91.4', '96.4', '95.5', '81.0', '94.1', '85.4', '94.9', '83.6']\nIoU: ['93.9', '92.3', '42.2', '88.8', '69.1', '71.2', '96.1', '75.1', '91.9', '48.9', '87.8', '56.0', '87.9', '85.7', '89.2', '89.5', '63.6', '90.3', '56.2', '85.3', '79.8']\nmean IoU: 78.1\n\n[epoch: 2]\ntrain_loss: 0.5840\nlr: 0.000054\nglobal correct: 94.8\naverage row correct: ['96.2', '95.5', '85.8', '94.6', '85.5', '83.7', '98.8', '87.5', '96.3', '71.4', '92.5', '72.8', '93.1', '91.9', '96.7', '94.9', '81.5', '95.3', '82.8', '95.3', '84.1']\nIoU: ['94.0', '91.2', '42.7', '88.3', '69.2', '72.7', '96.4', '74.8', '92.0', '49.8', '87.5', '58.3', '87.3', '85.0', '89.3', '89.2', '62.6', '89.6', '58.1', '84.8', '80.3']\nmean IoU: 78.2\n\n[epoch: 3]\ntrain_loss: 0.5637\nlr: 0.000029\nglobal correct: 94.8\naverage row correct: ['96.1', '95.9', '81.7', '94.8', '86.5', '79.4', '99.0', '89.1', '95.8', '71.4', '93.8', '71.0', '93.4', '92.4', '97.3', '94.9', '80.4', '96.9', '83.3', '94.7', '84.4']\nIoU: ['94.0', '89.5', '41.8', '87.6', '69.0', '70.4', '96.0', '75.9', '92.1', '49.7', '87.3', '58.1', '86.2', '83.9', '88.7', '89.2', '63.7', '88.8', '57.7', '85.3', '79.9']\nmean IoU: 77.8\n\n[epoch: 4]\ntrain_loss: 0.5779\nlr: 0.000000\nglobal correct: 94.8\naverage row correct: ['96.3', '93.6', '85.9', '95.1', '82.6', '83.8', '98.5', '90.0', '95.9', '71.1', '93.2', '68.4', '92.6', '93.9', '95.9', '94.5', '82.8', '96.3', '82.8', '94.5', '86.4']\nIoU: ['94.1', '91.8', '42.5', '88.5', '67.8', '72.1', '96.6', '78.3', '92.0', '49.8', '88.3', '58.8', '86.7', '84.9', '89.0', '89.5', '61.0', '89.1', '56.6', '84.6', '80.2']\nmean IoU: 78.2\n\n"
  },
  {
    "path": "pytorch_segmentation/deeplab_v3/src/__init__.py",
    "content": "from .deeplabv3_model import deeplabv3_resnet50, deeplabv3_resnet101, deeplabv3_mobilenetv3_large\n"
  },
  {
    "path": "pytorch_segmentation/deeplab_v3/src/deeplabv3_model.py",
    "content": "from collections import OrderedDict\n\nfrom typing import Dict, List\n\nimport torch\nfrom torch import nn, Tensor\nfrom torch.nn import functional as F\nfrom .resnet_backbone import resnet50, resnet101\nfrom .mobilenet_backbone import mobilenet_v3_large\n\n\nclass IntermediateLayerGetter(nn.ModuleDict):\n    \"\"\"\n    Module wrapper that returns intermediate layers from a model\n\n    It has a strong assumption that the modules have been registered\n    into the model in the same order as they are used.\n    This means that one should **not** reuse the same nn.Module\n    twice in the forward if you want this to work.\n\n    Additionally, it is only able to query submodules that are directly\n    assigned to the model. So if `model` is passed, `model.feature1` can\n    be returned, but not `model.feature1.layer2`.\n\n    Args:\n        model (nn.Module): model on which we will extract the features\n        return_layers (Dict[name, new_name]): a dict containing the names\n            of the modules for which the activations will be returned as\n            the key of the dict, and the value of the dict is the name\n            of the returned activation (which the user can specify).\n    \"\"\"\n    _version = 2\n    __annotations__ = {\n        \"return_layers\": Dict[str, str],\n    }\n\n    def __init__(self, model: nn.Module, return_layers: Dict[str, str]) -> None:\n        if not set(return_layers).issubset([name for name, _ in model.named_children()]):\n            raise ValueError(\"return_layers are not present in model\")\n        orig_return_layers = return_layers\n        return_layers = {str(k): str(v) for k, v in return_layers.items()}\n\n        # 重新构建backbone，将没有使用到的模块全部删掉\n        layers = OrderedDict()\n        for name, module in model.named_children():\n            layers[name] = module\n            if name in return_layers:\n                del return_layers[name]\n            if not return_layers:\n                break\n\n        super(IntermediateLayerGetter, self).__init__(layers)\n        self.return_layers = orig_return_layers\n\n    def forward(self, x: Tensor) -> Dict[str, Tensor]:\n        out = OrderedDict()\n        for name, module in self.items():\n            x = module(x)\n            if name in self.return_layers:\n                out_name = self.return_layers[name]\n                out[out_name] = x\n        return out\n\n\nclass DeepLabV3(nn.Module):\n    \"\"\"\n    Implements DeepLabV3 model from\n    `\"Rethinking Atrous Convolution for Semantic Image Segmentation\"\n    <https://arxiv.org/abs/1706.05587>`_.\n\n    Args:\n        backbone (nn.Module): the network used to compute the features for the model.\n            The backbone should return an OrderedDict[Tensor], with the key being\n            \"out\" for the last feature map used, and \"aux\" if an auxiliary classifier\n            is used.\n        classifier (nn.Module): module that takes the \"out\" element returned from\n            the backbone and returns a dense prediction.\n        aux_classifier (nn.Module, optional): auxiliary classifier used during training\n    \"\"\"\n    __constants__ = ['aux_classifier']\n\n    def __init__(self, backbone, classifier, aux_classifier=None):\n        super(DeepLabV3, self).__init__()\n        self.backbone = backbone\n        self.classifier = classifier\n        self.aux_classifier = aux_classifier\n\n    def forward(self, x: Tensor) -> Dict[str, Tensor]:\n        input_shape = x.shape[-2:]\n        # contract: features is a dict of tensors\n        features = self.backbone(x)\n\n        result = OrderedDict()\n        x = features[\"out\"]\n        x = self.classifier(x)\n        # 使用双线性插值还原回原图尺度\n        x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False)\n        result[\"out\"] = x\n\n        if self.aux_classifier is not None:\n            x = features[\"aux\"]\n            x = self.aux_classifier(x)\n            # 使用双线性插值还原回原图尺度\n            x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False)\n            result[\"aux\"] = x\n\n        return result\n\n\nclass FCNHead(nn.Sequential):\n    def __init__(self, in_channels, channels):\n        inter_channels = in_channels // 4\n        super(FCNHead, self).__init__(\n            nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False),\n            nn.BatchNorm2d(inter_channels),\n            nn.ReLU(),\n            nn.Dropout(0.1),\n            nn.Conv2d(inter_channels, channels, 1)\n        )\n\n\nclass ASPPConv(nn.Sequential):\n    def __init__(self, in_channels: int, out_channels: int, dilation: int) -> None:\n        super(ASPPConv, self).__init__(\n            nn.Conv2d(in_channels, out_channels, 3, padding=dilation, dilation=dilation, bias=False),\n            nn.BatchNorm2d(out_channels),\n            nn.ReLU()\n        )\n\n\nclass ASPPPooling(nn.Sequential):\n    def __init__(self, in_channels: int, out_channels: int) -> None:\n        super(ASPPPooling, self).__init__(\n            nn.AdaptiveAvgPool2d(1),\n            nn.Conv2d(in_channels, out_channels, 1, bias=False),\n            nn.BatchNorm2d(out_channels),\n            nn.ReLU()\n        )\n\n    def forward(self, x: torch.Tensor) -> torch.Tensor:\n        size = x.shape[-2:]\n        for mod in self:\n            x = mod(x)\n        return F.interpolate(x, size=size, mode='bilinear', align_corners=False)\n\n\nclass ASPP(nn.Module):\n    def __init__(self, in_channels: int, atrous_rates: List[int], out_channels: int = 256) -> None:\n        super(ASPP, self).__init__()\n        modules = [\n            nn.Sequential(nn.Conv2d(in_channels, out_channels, 1, bias=False),\n                          nn.BatchNorm2d(out_channels),\n                          nn.ReLU())\n        ]\n\n        rates = tuple(atrous_rates)\n        for rate in rates:\n            modules.append(ASPPConv(in_channels, out_channels, rate))\n\n        modules.append(ASPPPooling(in_channels, out_channels))\n\n        self.convs = nn.ModuleList(modules)\n\n        self.project = nn.Sequential(\n            nn.Conv2d(len(self.convs) * out_channels, out_channels, 1, bias=False),\n            nn.BatchNorm2d(out_channels),\n            nn.ReLU(),\n            nn.Dropout(0.5)\n        )\n\n    def forward(self, x: torch.Tensor) -> torch.Tensor:\n        _res = []\n        for conv in self.convs:\n            _res.append(conv(x))\n        res = torch.cat(_res, dim=1)\n        return self.project(res)\n\n\nclass DeepLabHead(nn.Sequential):\n    def __init__(self, in_channels: int, num_classes: int) -> None:\n        super(DeepLabHead, self).__init__(\n            ASPP(in_channels, [12, 24, 36]),\n            nn.Conv2d(256, 256, 3, padding=1, bias=False),\n            nn.BatchNorm2d(256),\n            nn.ReLU(),\n            nn.Conv2d(256, num_classes, 1)\n        )\n\n\ndef deeplabv3_resnet50(aux, num_classes=21, pretrain_backbone=False):\n    # 'resnet50_imagenet': 'https://download.pytorch.org/models/resnet50-0676ba61.pth'\n    # 'deeplabv3_resnet50_coco': 'https://download.pytorch.org/models/deeplabv3_resnet50_coco-cd0a2569.pth'\n    backbone = resnet50(replace_stride_with_dilation=[False, True, True])\n\n    if pretrain_backbone:\n        # 载入resnet50 backbone预训练权重\n        backbone.load_state_dict(torch.load(\"resnet50.pth\", map_location='cpu'))\n\n    out_inplanes = 2048\n    aux_inplanes = 1024\n\n    return_layers = {'layer4': 'out'}\n    if aux:\n        return_layers['layer3'] = 'aux'\n    backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)\n\n    aux_classifier = None\n    # why using aux: https://github.com/pytorch/vision/issues/4292\n    if aux:\n        aux_classifier = FCNHead(aux_inplanes, num_classes)\n\n    classifier = DeepLabHead(out_inplanes, num_classes)\n\n    model = DeepLabV3(backbone, classifier, aux_classifier)\n\n    return model\n\n\ndef deeplabv3_resnet101(aux, num_classes=21, pretrain_backbone=False):\n    # 'resnet101_imagenet': 'https://download.pytorch.org/models/resnet101-63fe2227.pth'\n    # 'deeplabv3_resnet101_coco': 'https://download.pytorch.org/models/deeplabv3_resnet101_coco-586e9e4e.pth'\n    backbone = resnet101(replace_stride_with_dilation=[False, True, True])\n\n    if pretrain_backbone:\n        # 载入resnet101 backbone预训练权重\n        backbone.load_state_dict(torch.load(\"resnet101.pth\", map_location='cpu'))\n\n    out_inplanes = 2048\n    aux_inplanes = 1024\n\n    return_layers = {'layer4': 'out'}\n    if aux:\n        return_layers['layer3'] = 'aux'\n    backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)\n\n    aux_classifier = None\n    # why using aux: https://github.com/pytorch/vision/issues/4292\n    if aux:\n        aux_classifier = FCNHead(aux_inplanes, num_classes)\n\n    classifier = DeepLabHead(out_inplanes, num_classes)\n\n    model = DeepLabV3(backbone, classifier, aux_classifier)\n\n    return model\n\n\ndef deeplabv3_mobilenetv3_large(aux, num_classes=21, pretrain_backbone=False):\n    # 'mobilenetv3_large_imagenet': 'https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth'\n    # 'depv3_mobilenetv3_large_coco': \"https://download.pytorch.org/models/deeplabv3_mobilenet_v3_large-fc3c493d.pth\"\n    backbone = mobilenet_v3_large(dilated=True)\n\n    if pretrain_backbone:\n        # 载入mobilenetv3 large backbone预训练权重\n        backbone.load_state_dict(torch.load(\"mobilenet_v3_large.pth\", map_location='cpu'))\n\n    backbone = backbone.features\n\n    # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks.\n    # The first and last blocks are always included because they are the C0 (conv1) and Cn.\n    stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, \"is_strided\", False)] + [len(backbone) - 1]\n    out_pos = stage_indices[-1]  # use C5 which has output_stride = 16\n    out_inplanes = backbone[out_pos].out_channels\n    aux_pos = stage_indices[-4]  # use C2 here which has output_stride = 8\n    aux_inplanes = backbone[aux_pos].out_channels\n    return_layers = {str(out_pos): \"out\"}\n    if aux:\n        return_layers[str(aux_pos)] = \"aux\"\n\n    backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)\n\n    aux_classifier = None\n    # why using aux: https://github.com/pytorch/vision/issues/4292\n    if aux:\n        aux_classifier = FCNHead(aux_inplanes, num_classes)\n\n    classifier = DeepLabHead(out_inplanes, num_classes)\n\n    model = DeepLabV3(backbone, classifier, aux_classifier)\n\n    return model\n"
  },
  {
    "path": "pytorch_segmentation/deeplab_v3/src/mobilenet_backbone.py",
    "content": "from typing import Callable, List, Optional\n\nimport torch\nfrom torch import nn, Tensor\nfrom torch.nn import functional as F\nfrom functools import partial\n\n\ndef _make_divisible(ch, divisor=8, min_ch=None):\n    \"\"\"\n    This function is taken from the original tf repo.\n    It ensures that all layers have a channel number that is divisible by 8\n    It can be seen here:\n    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py\n    \"\"\"\n    if min_ch is None:\n        min_ch = divisor\n    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)\n    # Make sure that round down does not go down by more than 10%.\n    if new_ch < 0.9 * ch:\n        new_ch += divisor\n    return new_ch\n\n\nclass ConvBNActivation(nn.Sequential):\n    def __init__(self,\n                 in_planes: int,\n                 out_planes: int,\n                 kernel_size: int = 3,\n                 stride: int = 1,\n                 groups: int = 1,\n                 norm_layer: Optional[Callable[..., nn.Module]] = None,\n                 activation_layer: Optional[Callable[..., nn.Module]] = None,\n                 dilation: int = 1):\n        padding = (kernel_size - 1) // 2 * dilation\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n        if activation_layer is None:\n            activation_layer = nn.ReLU6\n        super(ConvBNActivation, self).__init__(nn.Conv2d(in_channels=in_planes,\n                                                         out_channels=out_planes,\n                                                         kernel_size=kernel_size,\n                                                         stride=stride,\n                                                         dilation=dilation,\n                                                         padding=padding,\n                                                         groups=groups,\n                                                         bias=False),\n                                               norm_layer(out_planes),\n                                               activation_layer(inplace=True))\n        self.out_channels = out_planes\n\n\nclass SqueezeExcitation(nn.Module):\n    def __init__(self, input_c: int, squeeze_factor: int = 4):\n        super(SqueezeExcitation, self).__init__()\n        squeeze_c = _make_divisible(input_c // squeeze_factor, 8)\n        self.fc1 = nn.Conv2d(input_c, squeeze_c, 1)\n        self.fc2 = nn.Conv2d(squeeze_c, input_c, 1)\n\n    def forward(self, x: Tensor) -> Tensor:\n        scale = F.adaptive_avg_pool2d(x, output_size=(1, 1))\n        scale = self.fc1(scale)\n        scale = F.relu(scale, inplace=True)\n        scale = self.fc2(scale)\n        scale = F.hardsigmoid(scale, inplace=True)\n        return scale * x\n\n\nclass InvertedResidualConfig:\n    def __init__(self,\n                 input_c: int,\n                 kernel: int,\n                 expanded_c: int,\n                 out_c: int,\n                 use_se: bool,\n                 activation: str,\n                 stride: int,\n                 dilation: int,\n                 width_multi: float):\n        self.input_c = self.adjust_channels(input_c, width_multi)\n        self.kernel = kernel\n        self.expanded_c = self.adjust_channels(expanded_c, width_multi)\n        self.out_c = self.adjust_channels(out_c, width_multi)\n        self.use_se = use_se\n        self.use_hs = activation == \"HS\"  # whether using h-swish activation\n        self.stride = stride\n        self.dilation = dilation\n\n    @staticmethod\n    def adjust_channels(channels: int, width_multi: float):\n        return _make_divisible(channels * width_multi, 8)\n\n\nclass InvertedResidual(nn.Module):\n    def __init__(self,\n                 cnf: InvertedResidualConfig,\n                 norm_layer: Callable[..., nn.Module]):\n        super(InvertedResidual, self).__init__()\n\n        if cnf.stride not in [1, 2]:\n            raise ValueError(\"illegal stride value.\")\n\n        self.use_res_connect = (cnf.stride == 1 and cnf.input_c == cnf.out_c)\n\n        layers: List[nn.Module] = []\n        activation_layer = nn.Hardswish if cnf.use_hs else nn.ReLU\n\n        # expand\n        if cnf.expanded_c != cnf.input_c:\n            layers.append(ConvBNActivation(cnf.input_c,\n                                           cnf.expanded_c,\n                                           kernel_size=1,\n                                           norm_layer=norm_layer,\n                                           activation_layer=activation_layer))\n\n        # depthwise\n        stride = 1 if cnf.dilation > 1 else cnf.stride\n        layers.append(ConvBNActivation(cnf.expanded_c,\n                                       cnf.expanded_c,\n                                       kernel_size=cnf.kernel,\n                                       stride=stride,\n                                       dilation=cnf.dilation,\n                                       groups=cnf.expanded_c,\n                                       norm_layer=norm_layer,\n                                       activation_layer=activation_layer))\n\n        if cnf.use_se:\n            layers.append(SqueezeExcitation(cnf.expanded_c))\n\n        # project\n        layers.append(ConvBNActivation(cnf.expanded_c,\n                                       cnf.out_c,\n                                       kernel_size=1,\n                                       norm_layer=norm_layer,\n                                       activation_layer=nn.Identity))\n\n        self.block = nn.Sequential(*layers)\n        self.out_channels = cnf.out_c\n        self.is_strided = cnf.stride > 1\n\n    def forward(self, x: Tensor) -> Tensor:\n        result = self.block(x)\n        if self.use_res_connect:\n            result += x\n\n        return result\n\n\nclass MobileNetV3(nn.Module):\n    def __init__(self,\n                 inverted_residual_setting: List[InvertedResidualConfig],\n                 last_channel: int,\n                 num_classes: int = 1000,\n                 block: Optional[Callable[..., nn.Module]] = None,\n                 norm_layer: Optional[Callable[..., nn.Module]] = None):\n        super(MobileNetV3, self).__init__()\n\n        if not inverted_residual_setting:\n            raise ValueError(\"The inverted_residual_setting should not be empty.\")\n        elif not (isinstance(inverted_residual_setting, List) and\n                  all([isinstance(s, InvertedResidualConfig) for s in inverted_residual_setting])):\n            raise TypeError(\"The inverted_residual_setting should be List[InvertedResidualConfig]\")\n\n        if block is None:\n            block = InvertedResidual\n\n        if norm_layer is None:\n            norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.01)\n\n        layers: List[nn.Module] = []\n\n        # building first layer\n        firstconv_output_c = inverted_residual_setting[0].input_c\n        layers.append(ConvBNActivation(3,\n                                       firstconv_output_c,\n                                       kernel_size=3,\n                                       stride=2,\n                                       norm_layer=norm_layer,\n                                       activation_layer=nn.Hardswish))\n        # building inverted residual blocks\n        for cnf in inverted_residual_setting:\n            layers.append(block(cnf, norm_layer))\n\n        # building last several layers\n        lastconv_input_c = inverted_residual_setting[-1].out_c\n        lastconv_output_c = 6 * lastconv_input_c\n        layers.append(ConvBNActivation(lastconv_input_c,\n                                       lastconv_output_c,\n                                       kernel_size=1,\n                                       norm_layer=norm_layer,\n                                       activation_layer=nn.Hardswish))\n        self.features = nn.Sequential(*layers)\n        self.avgpool = nn.AdaptiveAvgPool2d(1)\n        self.classifier = nn.Sequential(nn.Linear(lastconv_output_c, last_channel),\n                                        nn.Hardswish(inplace=True),\n                                        nn.Dropout(p=0.2, inplace=True),\n                                        nn.Linear(last_channel, num_classes))\n\n        # initial weights\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode=\"fan_out\")\n                if m.bias is not None:\n                    nn.init.zeros_(m.bias)\n            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):\n                nn.init.ones_(m.weight)\n                nn.init.zeros_(m.bias)\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                nn.init.zeros_(m.bias)\n\n    def _forward_impl(self, x: Tensor) -> Tensor:\n        x = self.features(x)\n        x = self.avgpool(x)\n        x = torch.flatten(x, 1)\n        x = self.classifier(x)\n\n        return x\n\n    def forward(self, x: Tensor) -> Tensor:\n        return self._forward_impl(x)\n\n\ndef mobilenet_v3_large(num_classes: int = 1000,\n                       reduced_tail: bool = False,\n                       dilated: bool = False) -> MobileNetV3:\n    \"\"\"\n    Constructs a large MobileNetV3 architecture from\n    \"Searching for MobileNetV3\" <https://arxiv.org/abs/1905.02244>.\n\n    weights_link:\n    https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth\n\n    Args:\n        num_classes (int): number of classes\n        reduced_tail (bool): If True, reduces the channel counts of all feature layers\n            between C4 and C5 by 2. It is used to reduce the channel redundancy in the\n            backbone for Detection and Segmentation.\n        dilated: whether using dilated conv\n    \"\"\"\n    width_multi = 1.0\n    bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi)\n    adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi)\n\n    reduce_divider = 2 if reduced_tail else 1\n    dilation = 2 if dilated else 1\n\n    inverted_residual_setting = [\n        # input_c, kernel, expanded_c, out_c, use_se, activation, stride, dilation\n        bneck_conf(16, 3, 16, 16, False, \"RE\", 1, 1),\n        bneck_conf(16, 3, 64, 24, False, \"RE\", 2, 1),  # C1\n        bneck_conf(24, 3, 72, 24, False, \"RE\", 1, 1),\n        bneck_conf(24, 5, 72, 40, True, \"RE\", 2, 1),  # C2\n        bneck_conf(40, 5, 120, 40, True, \"RE\", 1, 1),\n        bneck_conf(40, 5, 120, 40, True, \"RE\", 1, 1),\n        bneck_conf(40, 3, 240, 80, False, \"HS\", 2, 1),  # C3\n        bneck_conf(80, 3, 200, 80, False, \"HS\", 1, 1),\n        bneck_conf(80, 3, 184, 80, False, \"HS\", 1, 1),\n        bneck_conf(80, 3, 184, 80, False, \"HS\", 1, 1),\n        bneck_conf(80, 3, 480, 112, True, \"HS\", 1, 1),\n        bneck_conf(112, 3, 672, 112, True, \"HS\", 1, 1),\n        bneck_conf(112, 5, 672, 160 // reduce_divider, True, \"HS\", 2, dilation),  # C4\n        bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, \"HS\", 1, dilation),\n        bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, \"HS\", 1, dilation),\n    ]\n    last_channel = adjust_channels(1280 // reduce_divider)  # C5\n\n    return MobileNetV3(inverted_residual_setting=inverted_residual_setting,\n                       last_channel=last_channel,\n                       num_classes=num_classes)\n\n\ndef mobilenet_v3_small(num_classes: int = 1000,\n                       reduced_tail: bool = False,\n                       dilated: bool = False) -> MobileNetV3:\n    \"\"\"\n    Constructs a large MobileNetV3 architecture from\n    \"Searching for MobileNetV3\" <https://arxiv.org/abs/1905.02244>.\n\n    weights_link:\n    https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth\n\n    Args:\n        num_classes (int): number of classes\n        reduced_tail (bool): If True, reduces the channel counts of all feature layers\n            between C4 and C5 by 2. It is used to reduce the channel redundancy in the\n            backbone for Detection and Segmentation.\n        dilated: whether using dilated conv\n    \"\"\"\n    width_multi = 1.0\n    bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi)\n    adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi)\n\n    reduce_divider = 2 if reduced_tail else 1\n    dilation = 2 if dilated else 1\n\n    inverted_residual_setting = [\n        # input_c, kernel, expanded_c, out_c, use_se, activation, stride, dilation\n        bneck_conf(16, 3, 16, 16, True, \"RE\", 2, 1),  # C1\n        bneck_conf(16, 3, 72, 24, False, \"RE\", 2, 1),  # C2\n        bneck_conf(24, 3, 88, 24, False, \"RE\", 1, 1),\n        bneck_conf(24, 5, 96, 40, True, \"HS\", 2, 1),  # C3\n        bneck_conf(40, 5, 240, 40, True, \"HS\", 1, 1),\n        bneck_conf(40, 5, 240, 40, True, \"HS\", 1, 1),\n        bneck_conf(40, 5, 120, 48, True, \"HS\", 1, 1),\n        bneck_conf(48, 5, 144, 48, True, \"HS\", 1, 1),\n        bneck_conf(48, 5, 288, 96 // reduce_divider, True, \"HS\", 2, dilation),  # C4\n        bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, \"HS\", 1, dilation),\n        bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, \"HS\", 1, dilation)\n    ]\n    last_channel = adjust_channels(1024 // reduce_divider)  # C5\n\n    return MobileNetV3(inverted_residual_setting=inverted_residual_setting,\n                       last_channel=last_channel,\n                       num_classes=num_classes)\n"
  },
  {
    "path": "pytorch_segmentation/deeplab_v3/src/resnet_backbone.py",
    "content": "import torch\nimport torch.nn as nn\n\n\ndef conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):\n    \"\"\"3x3 convolution with padding\"\"\"\n    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,\n                     padding=dilation, groups=groups, bias=False, dilation=dilation)\n\n\ndef conv1x1(in_planes, out_planes, stride=1):\n    \"\"\"1x1 convolution\"\"\"\n    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)\n\n\nclass Bottleneck(nn.Module):\n    # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)\n    # while original implementation places the stride at the first 1x1 convolution(self.conv1)\n    # according to \"Deep residual learning for image recognition\"https://arxiv.org/abs/1512.03385.\n    # This variant is also known as ResNet V1.5 and improves accuracy according to\n    # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.\n\n    expansion = 4\n\n    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,\n                 base_width=64, dilation=1, norm_layer=None):\n        super(Bottleneck, self).__init__()\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n        width = int(planes * (base_width / 64.)) * groups\n        # Both self.conv2 and self.downsample layers downsample the input when stride != 1\n        self.conv1 = conv1x1(inplanes, width)\n        self.bn1 = norm_layer(width)\n        self.conv2 = conv3x3(width, width, stride, groups, dilation)\n        self.bn2 = norm_layer(width)\n        self.conv3 = conv1x1(width, planes * self.expansion)\n        self.bn3 = norm_layer(planes * self.expansion)\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n        self.stride = stride\n\n    def forward(self, x):\n        identity = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass ResNet(nn.Module):\n\n    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,\n                 groups=1, width_per_group=64, replace_stride_with_dilation=None,\n                 norm_layer=None):\n        super(ResNet, self).__init__()\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n        self._norm_layer = norm_layer\n\n        self.inplanes = 64\n        self.dilation = 1\n        if replace_stride_with_dilation is None:\n            # each element in the tuple indicates if we should replace\n            # the 2x2 stride with a dilated convolution instead\n            replace_stride_with_dilation = [False, False, False]\n        if len(replace_stride_with_dilation) != 3:\n            raise ValueError(\"replace_stride_with_dilation should be None \"\n                             \"or a 3-element tuple, got {}\".format(replace_stride_with_dilation))\n        self.groups = groups\n        self.base_width = width_per_group\n        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,\n                               bias=False)\n        self.bn1 = norm_layer(self.inplanes)\n        self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.layer1 = self._make_layer(block, 64, layers[0])\n        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,\n                                       dilate=replace_stride_with_dilation[0])\n        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,\n                                       dilate=replace_stride_with_dilation[1])\n        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,\n                                       dilate=replace_stride_with_dilation[2])\n        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))\n        self.fc = nn.Linear(512 * block.expansion, num_classes)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')\n            elif isinstance(m, nn.BatchNorm2d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n\n        # Zero-initialize the last BN in each residual branch,\n        # so that the residual branch starts with zeros, and each residual block behaves like an identity.\n        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677\n        if zero_init_residual:\n            for m in self.modules():\n                if isinstance(m, Bottleneck):\n                    nn.init.constant_(m.bn3.weight, 0)\n\n    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):\n        norm_layer = self._norm_layer\n        downsample = None\n        previous_dilation = self.dilation\n        if dilate:\n            self.dilation *= stride\n            stride = 1\n        if stride != 1 or self.inplanes != planes * block.expansion:\n            downsample = nn.Sequential(\n                conv1x1(self.inplanes, planes * block.expansion, stride),\n                norm_layer(planes * block.expansion),\n            )\n\n        layers = []\n        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,\n                            self.base_width, previous_dilation, norm_layer))\n        self.inplanes = planes * block.expansion\n        for _ in range(1, blocks):\n            layers.append(block(self.inplanes, planes, groups=self.groups,\n                                base_width=self.base_width, dilation=self.dilation,\n                                norm_layer=norm_layer))\n\n        return nn.Sequential(*layers)\n\n    def _forward_impl(self, x):\n        # See note [TorchScript super()]\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n\n        x = self.avgpool(x)\n        x = torch.flatten(x, 1)\n        x = self.fc(x)\n\n        return x\n\n    def forward(self, x):\n        return self._forward_impl(x)\n\n\ndef _resnet(block, layers, **kwargs):\n    model = ResNet(block, layers, **kwargs)\n    return model\n\n\ndef resnet50(**kwargs):\n    r\"\"\"ResNet-50 model from\n    `\"Deep Residual Learning for Image Recognition\" <https://arxiv.org/pdf/1512.03385.pdf>`_\n\n    Args:\n        pretrained (bool): If True, returns a model pre-trained on ImageNet\n        progress (bool): If True, displays a progress bar of the download to stderr\n    \"\"\"\n    return _resnet(Bottleneck, [3, 4, 6, 3], **kwargs)\n\n\ndef resnet101(**kwargs):\n    r\"\"\"ResNet-101 model from\n    `\"Deep Residual Learning for Image Recognition\" <https://arxiv.org/pdf/1512.03385.pdf>`_\n\n    Args:\n        pretrained (bool): If True, returns a model pre-trained on ImageNet\n        progress (bool): If True, displays a progress bar of the download to stderr\n    \"\"\"\n    return _resnet(Bottleneck, [3, 4, 23, 3], **kwargs)\n"
  },
  {
    "path": "pytorch_segmentation/deeplab_v3/train.py",
    "content": "import os\nimport time\nimport datetime\n\nimport torch\n\nfrom src import deeplabv3_resnet50\nfrom train_utils import train_one_epoch, evaluate, create_lr_scheduler\nfrom my_dataset import VOCSegmentation\nimport transforms as T\n\n\nclass SegmentationPresetTrain:\n    def __init__(self, base_size, crop_size, hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        min_size = int(0.5 * base_size)\n        max_size = int(2.0 * base_size)\n\n        trans = [T.RandomResize(min_size, max_size)]\n        if hflip_prob > 0:\n            trans.append(T.RandomHorizontalFlip(hflip_prob))\n        trans.extend([\n            T.RandomCrop(crop_size),\n            T.ToTensor(),\n            T.Normalize(mean=mean, std=std),\n        ])\n        self.transforms = T.Compose(trans)\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\nclass SegmentationPresetEval:\n    def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        self.transforms = T.Compose([\n            T.RandomResize(base_size, base_size),\n            T.ToTensor(),\n            T.Normalize(mean=mean, std=std),\n        ])\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\ndef get_transform(train):\n    base_size = 520\n    crop_size = 480\n\n    return SegmentationPresetTrain(base_size, crop_size) if train else SegmentationPresetEval(base_size)\n\n\ndef create_model(aux, num_classes, pretrain=True):\n    model = deeplabv3_resnet50(aux=aux, num_classes=num_classes)\n\n    if pretrain:\n        weights_dict = torch.load(\"./deeplabv3_resnet50_coco.pth\", map_location='cpu')\n\n        if num_classes != 21:\n            # 官方提供的预训练权重是21类(包括背景)\n            # 如果训练自己的数据集，将和类别相关的权重删除，防止权重shape不一致报错\n            for k in list(weights_dict.keys()):\n                if \"classifier.4\" in k:\n                    del weights_dict[k]\n\n        missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False)\n        if len(missing_keys) != 0 or len(unexpected_keys) != 0:\n            print(\"missing_keys: \", missing_keys)\n            print(\"unexpected_keys: \", unexpected_keys)\n\n    return model\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n    batch_size = args.batch_size\n    # segmentation nun_classes + background\n    num_classes = args.num_classes + 1\n\n    # 用来保存训练以及验证过程中信息\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> train.txt\n    train_dataset = VOCSegmentation(args.data_path,\n                                    year=\"2012\",\n                                    transforms=get_transform(train=True),\n                                    txt_name=\"train.txt\")\n\n    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt\n    val_dataset = VOCSegmentation(args.data_path,\n                                  year=\"2012\",\n                                  transforms=get_transform(train=False),\n                                  txt_name=\"val.txt\")\n\n    num_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])\n    train_loader = torch.utils.data.DataLoader(train_dataset,\n                                               batch_size=batch_size,\n                                               num_workers=num_workers,\n                                               shuffle=True,\n                                               pin_memory=True,\n                                               collate_fn=train_dataset.collate_fn)\n\n    val_loader = torch.utils.data.DataLoader(val_dataset,\n                                             batch_size=1,\n                                             num_workers=num_workers,\n                                             pin_memory=True,\n                                             collate_fn=val_dataset.collate_fn)\n\n    model = create_model(aux=args.aux, num_classes=num_classes)\n    model.to(device)\n\n    params_to_optimize = [\n        {\"params\": [p for p in model.backbone.parameters() if p.requires_grad]},\n        {\"params\": [p for p in model.classifier.parameters() if p.requires_grad]}\n    ]\n\n    if args.aux:\n        params = [p for p in model.aux_classifier.parameters() if p.requires_grad]\n        params_to_optimize.append({\"params\": params, \"lr\": args.lr * 10})\n\n    optimizer = torch.optim.SGD(\n        params_to_optimize,\n        lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay\n    )\n\n    scaler = torch.cuda.amp.GradScaler() if args.amp else None\n\n    # 创建学习率更新策略，这里是每个step更新一次(不是每个epoch)\n    lr_scheduler = create_lr_scheduler(optimizer, len(train_loader), args.epochs, warmup=True)\n\n    # import matplotlib.pyplot as plt\n    # lr_list = []\n    # for _ in range(args.epochs):\n    #     for _ in range(len(train_loader)):\n    #         lr_scheduler.step()\n    #         lr = optimizer.param_groups[0][\"lr\"]\n    #         lr_list.append(lr)\n    # plt.plot(range(len(lr_list)), lr_list)\n    # plt.show()\n\n    if args.resume:\n        checkpoint = torch.load(args.resume, map_location='cpu')\n        model.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n        if args.amp:\n            scaler.load_state_dict(checkpoint[\"scaler\"])\n\n    start_time = time.time()\n    for epoch in range(args.start_epoch, args.epochs):\n        mean_loss, lr = train_one_epoch(model, optimizer, train_loader, device, epoch,\n                                        lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler)\n\n        confmat = evaluate(model, val_loader, device=device, num_classes=num_classes)\n        val_info = str(confmat)\n        print(val_info)\n        # write into txt\n        with open(results_file, \"a\") as f:\n            # 记录每个epoch对应的train_loss、lr以及验证集各指标\n            train_info = f\"[epoch: {epoch}]\\n\" \\\n                         f\"train_loss: {mean_loss:.4f}\\n\" \\\n                         f\"lr: {lr:.6f}\\n\"\n            f.write(train_info + val_info + \"\\n\\n\")\n\n        save_file = {\"model\": model.state_dict(),\n                     \"optimizer\": optimizer.state_dict(),\n                     \"lr_scheduler\": lr_scheduler.state_dict(),\n                     \"epoch\": epoch,\n                     \"args\": args}\n        if args.amp:\n            save_file[\"scaler\"] = scaler.state_dict()\n        torch.save(save_file, \"save_weights/model_{}.pth\".format(epoch))\n\n    total_time = time.time() - start_time\n    total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n    print(\"training time {}\".format(total_time_str))\n\n\ndef parse_args():\n    import argparse\n    parser = argparse.ArgumentParser(description=\"pytorch deeplabv3 training\")\n\n    parser.add_argument(\"--data-path\", default=\"/data/\", help=\"VOCdevkit root\")\n    parser.add_argument(\"--num-classes\", default=20, type=int)\n    parser.add_argument(\"--aux\", default=True, type=bool, help=\"auxilier loss\")\n    parser.add_argument(\"--device\", default=\"cuda\", help=\"training device\")\n    parser.add_argument(\"-b\", \"--batch-size\", default=4, type=int)\n    parser.add_argument(\"--epochs\", default=30, type=int, metavar=\"N\",\n                        help=\"number of total epochs to train\")\n\n    parser.add_argument('--lr', default=0.0001, type=float, help='initial learning rate')\n    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',\n                        help='momentum')\n    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,\n                        metavar='W', help='weight decay (default: 1e-4)',\n                        dest='weight_decay')\n    parser.add_argument('--print-freq', default=10, type=int, help='print frequency')\n    parser.add_argument('--resume', default='', help='resume from checkpoint')\n    parser.add_argument('--start-epoch', default=0, type=int, metavar='N',\n                        help='start epoch')\n    # Mixed precision training parameters\n    parser.add_argument(\"--amp\", default=False, type=bool,\n                        help=\"Use torch.cuda.amp for mixed precision training\")\n\n    args = parser.parse_args()\n\n    return args\n\n\nif __name__ == '__main__':\n    args = parse_args()\n\n    if not os.path.exists(\"./save_weights\"):\n        os.mkdir(\"./save_weights\")\n\n    main(args)\n"
  },
  {
    "path": "pytorch_segmentation/deeplab_v3/train_multi_GPU.py",
    "content": "import time\nimport os\nimport datetime\n\nimport torch\n\nfrom src import deeplabv3_resnet50\nfrom train_utils import train_one_epoch, evaluate, create_lr_scheduler, init_distributed_mode, save_on_master, mkdir\nfrom my_dataset import VOCSegmentation\nimport transforms as T\n\n\nclass SegmentationPresetTrain:\n    def __init__(self, base_size, crop_size, hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        min_size = int(0.5 * base_size)\n        max_size = int(2.0 * base_size)\n\n        trans = [T.RandomResize(min_size, max_size)]\n        if hflip_prob > 0:\n            trans.append(T.RandomHorizontalFlip(hflip_prob))\n        trans.extend([\n            T.RandomCrop(crop_size),\n            T.ToTensor(),\n            T.Normalize(mean=mean, std=std),\n        ])\n        self.transforms = T.Compose(trans)\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\nclass SegmentationPresetEval:\n    def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        self.transforms = T.Compose([\n            T.RandomResize(base_size, base_size),\n            T.ToTensor(),\n            T.Normalize(mean=mean, std=std),\n        ])\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\ndef get_transform(train):\n    base_size = 520\n    crop_size = 480\n\n    return SegmentationPresetTrain(base_size, crop_size) if train else SegmentationPresetEval(base_size)\n\n\ndef create_model(aux, num_classes):\n    model = deeplabv3_resnet50(aux=aux, num_classes=num_classes)\n    weights_dict = torch.load(\"./deeplabv3_resnet50_coco.pth\", map_location='cpu')\n\n    if num_classes != 21:\n        # 官方提供的预训练权重是21类(包括背景)\n        # 如果训练自己的数据集，将和类别相关的权重删除，防止权重shape不一致报错\n        for k in list(weights_dict.keys()):\n            if \"classifier.4\" in k:\n                del weights_dict[k]\n\n    missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False)\n    if len(missing_keys) != 0 or len(unexpected_keys) != 0:\n        print(\"missing_keys: \", missing_keys)\n        print(\"unexpected_keys: \", unexpected_keys)\n\n    return model\n\n\ndef main(args):\n    init_distributed_mode(args)\n    print(args)\n\n    device = torch.device(args.device)\n    # segmentation nun_classes + background\n    num_classes = args.num_classes + 1\n\n    # 用来保存coco_info的文件\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    VOC_root = args.data_path\n    # check voc root\n    if os.path.exists(os.path.join(VOC_root, \"VOCdevkit\")) is False:\n        raise FileNotFoundError(\"VOCdevkit dose not in path:'{}'.\".format(VOC_root))\n\n    # load train data set\n    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> train.txt\n    train_dataset = VOCSegmentation(args.data_path,\n                                    year=\"2012\",\n                                    transforms=get_transform(train=True),\n                                    txt_name=\"train.txt\")\n    # load validation data set\n    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt\n    val_dataset = VOCSegmentation(args.data_path,\n                                  year=\"2012\",\n                                  transforms=get_transform(train=False),\n                                  txt_name=\"val.txt\")\n\n    print(\"Creating data loaders\")\n    if args.distributed:\n        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)\n        test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)\n    else:\n        train_sampler = torch.utils.data.RandomSampler(train_dataset)\n        test_sampler = torch.utils.data.SequentialSampler(val_dataset)\n\n    train_data_loader = torch.utils.data.DataLoader(\n        train_dataset, batch_size=args.batch_size,\n        sampler=train_sampler, num_workers=args.workers,\n        collate_fn=train_dataset.collate_fn, drop_last=True)\n\n    val_data_loader = torch.utils.data.DataLoader(\n        val_dataset, batch_size=1,\n        sampler=test_sampler, num_workers=args.workers,\n        collate_fn=train_dataset.collate_fn)\n\n    print(\"Creating model\")\n    # create model num_classes equal background + 20 classes\n    model = create_model(aux=args.aux, num_classes=num_classes)\n    model.to(device)\n\n    if args.sync_bn:\n        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)\n\n    model_without_ddp = model\n    if args.distributed:\n        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])\n        model_without_ddp = model.module\n\n    params_to_optimize = [\n        {\"params\": [p for p in model_without_ddp.backbone.parameters() if p.requires_grad]},\n        {\"params\": [p for p in model_without_ddp.classifier.parameters() if p.requires_grad]},\n    ]\n    if args.aux:\n        params = [p for p in model_without_ddp.aux_classifier.parameters() if p.requires_grad]\n        params_to_optimize.append({\"params\": params, \"lr\": args.lr * 10})\n    optimizer = torch.optim.SGD(\n        params_to_optimize,\n        lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)\n\n    scaler = torch.cuda.amp.GradScaler() if args.amp else None\n\n    # 创建学习率更新策略，这里是每个step更新一次(不是每个epoch)\n    lr_scheduler = create_lr_scheduler(optimizer, len(train_data_loader), args.epochs, warmup=True)\n\n    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练\n    if args.resume:\n        # If map_location is missing, torch.load will first load the module to CPU\n        # and then copy each parameter to where it was saved,\n        # which would result in all processes on the same machine using the same set of devices.\n        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)\n        model_without_ddp.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n        if args.amp:\n            scaler.load_state_dict(checkpoint[\"scaler\"])\n\n    if args.test_only:\n        confmat = evaluate(model, val_data_loader, device=device, num_classes=num_classes)\n        val_info = str(confmat)\n        print(val_info)\n        return\n\n    print(\"Start training\")\n    start_time = time.time()\n    for epoch in range(args.start_epoch, args.epochs):\n        if args.distributed:\n            train_sampler.set_epoch(epoch)\n        mean_loss, lr = train_one_epoch(model, optimizer, train_data_loader, device, epoch,\n                                        lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler)\n\n        confmat = evaluate(model, val_data_loader, device=device, num_classes=num_classes)\n        val_info = str(confmat)\n        print(val_info)\n\n        # 只在主进程上进行写操作\n        if args.rank in [-1, 0]:\n            # write into txt\n            with open(results_file, \"a\") as f:\n                # 记录每个epoch对应的train_loss、lr以及验证集各指标\n                train_info = f\"[epoch: {epoch}]\\n\" \\\n                             f\"train_loss: {mean_loss:.4f}\\n\" \\\n                             f\"lr: {lr:.6f}\\n\"\n                f.write(train_info + val_info + \"\\n\\n\")\n\n        if args.output_dir:\n            # 只在主节点上执行保存权重操作\n            save_file = {'model': model_without_ddp.state_dict(),\n                         'optimizer': optimizer.state_dict(),\n                         'lr_scheduler': lr_scheduler.state_dict(),\n                         'args': args,\n                         'epoch': epoch}\n            if args.amp:\n                save_file[\"scaler\"] = scaler.state_dict()\n            save_on_master(save_file,\n                           os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))\n\n    total_time = time.time() - start_time\n    total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n    print('Training time {}'.format(total_time_str))\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 训练文件的根目录(VOCdevkit)\n    parser.add_argument('--data-path', default='/data/', help='dataset')\n    # 训练设备类型\n    parser.add_argument('--device', default='cuda', help='device')\n    # 检测目标类别数(不包含背景)\n    parser.add_argument('--num-classes', default=20, type=int, help='num_classes')\n    # 每块GPU上的batch_size\n    parser.add_argument('-b', '--batch-size', default=4, type=int,\n                        help='images per gpu, the total batch size is $NGPU x batch_size')\n    parser.add_argument(\"--aux\", default=True, type=bool, help=\"auxilier loss\")\n    # 指定接着从哪个epoch数开始训练\n    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')\n    # 训练的总epoch数\n    parser.add_argument('--epochs', default=20, type=int, metavar='N',\n                        help='number of total epochs to run')\n    # 是否使用同步BN(在多个GPU之间同步)，默认不开启，开启后训练速度会变慢\n    parser.add_argument('--sync_bn', type=bool, default=False, help='whether using SyncBatchNorm')\n    # 数据加载以及预处理的线程数\n    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',\n                        help='number of data loading workers (default: 4)')\n    # 训练学习率，这里默认设置成0.0001，如果效果不好可以尝试加大学习率\n    parser.add_argument('--lr', default=0.0001, type=float,\n                        help='initial learning rate')\n    # SGD的momentum参数\n    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',\n                        help='momentum')\n    # SGD的weight_decay参数\n    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,\n                        metavar='W', help='weight decay (default: 1e-4)',\n                        dest='weight_decay')\n    # 训练过程打印信息的频率\n    parser.add_argument('--print-freq', default=20, type=int, help='print frequency')\n    # 文件保存地址\n    parser.add_argument('--output-dir', default='./multi_train', help='path where to save')\n    # 基于上次的训练结果接着训练\n    parser.add_argument('--resume', default='', help='resume from checkpoint')\n    # 不训练，仅测试\n    parser.add_argument(\n        \"--test-only\",\n        dest=\"test_only\",\n        help=\"Only test the model\",\n        action=\"store_true\",\n    )\n\n    # 分布式进程数\n    parser.add_argument('--world-size', default=1, type=int,\n                        help='number of distributed processes')\n    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')\n    # Mixed precision training parameters\n    parser.add_argument(\"--amp\", default=False, type=bool,\n                        help=\"Use torch.cuda.amp for mixed precision training\")\n\n    args = parser.parse_args()\n\n    # 如果指定了保存文件地址，检查文件夹是否存在，若不存在，则创建\n    if args.output_dir:\n        mkdir(args.output_dir)\n\n    main(args)\n"
  },
  {
    "path": "pytorch_segmentation/deeplab_v3/train_utils/__init__.py",
    "content": "from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler\nfrom .distributed_utils import init_distributed_mode, save_on_master, mkdir\n"
  },
  {
    "path": "pytorch_segmentation/deeplab_v3/train_utils/distributed_utils.py",
    "content": "from collections import defaultdict, deque\nimport datetime\nimport time\nimport torch\nimport torch.distributed as dist\n\nimport errno\nimport os\n\n\nclass SmoothedValue(object):\n    \"\"\"Track a series of values and provide access to smoothed values over a\n    window or the global series average.\n    \"\"\"\n\n    def __init__(self, window_size=20, fmt=None):\n        if fmt is None:\n            fmt = \"{value:.4f} ({global_avg:.4f})\"\n        self.deque = deque(maxlen=window_size)\n        self.total = 0.0\n        self.count = 0\n        self.fmt = fmt\n\n    def update(self, value, n=1):\n        self.deque.append(value)\n        self.count += n\n        self.total += value * n\n\n    def synchronize_between_processes(self):\n        \"\"\"\n        Warning: does not synchronize the deque!\n        \"\"\"\n        if not is_dist_avail_and_initialized():\n            return\n        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')\n        dist.barrier()\n        dist.all_reduce(t)\n        t = t.tolist()\n        self.count = int(t[0])\n        self.total = t[1]\n\n    @property\n    def median(self):\n        d = torch.tensor(list(self.deque))\n        return d.median().item()\n\n    @property\n    def avg(self):\n        d = torch.tensor(list(self.deque), dtype=torch.float32)\n        return d.mean().item()\n\n    @property\n    def global_avg(self):\n        return self.total / self.count\n\n    @property\n    def max(self):\n        return max(self.deque)\n\n    @property\n    def value(self):\n        return self.deque[-1]\n\n    def __str__(self):\n        return self.fmt.format(\n            median=self.median,\n            avg=self.avg,\n            global_avg=self.global_avg,\n            max=self.max,\n            value=self.value)\n\n\nclass ConfusionMatrix(object):\n    def __init__(self, num_classes):\n        self.num_classes = num_classes\n        self.mat = None\n\n    def update(self, a, b):\n        n = self.num_classes\n        if self.mat is None:\n            # 创建混淆矩阵\n            self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device)\n        with torch.no_grad():\n            # 寻找GT中为目标的像素索引\n            k = (a >= 0) & (a < n)\n            # 统计像素真实类别a[k]被预测成类别b[k]的个数(这里的做法很巧妙)\n            inds = n * a[k].to(torch.int64) + b[k]\n            self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n)\n\n    def reset(self):\n        if self.mat is not None:\n            self.mat.zero_()\n\n    def compute(self):\n        h = self.mat.float()\n        # 计算全局预测准确率(混淆矩阵的对角线为预测正确的个数)\n        acc_global = torch.diag(h).sum() / h.sum()\n        # 计算每个类别的准确率\n        acc = torch.diag(h) / h.sum(1)\n        # 计算每个类别预测与真实目标的iou\n        iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h))\n        return acc_global, acc, iu\n\n    def reduce_from_all_processes(self):\n        if not torch.distributed.is_available():\n            return\n        if not torch.distributed.is_initialized():\n            return\n        torch.distributed.barrier()\n        torch.distributed.all_reduce(self.mat)\n\n    def __str__(self):\n        acc_global, acc, iu = self.compute()\n        return (\n            'global correct: {:.1f}\\n'\n            'average row correct: {}\\n'\n            'IoU: {}\\n'\n            'mean IoU: {:.1f}').format(\n                acc_global.item() * 100,\n                ['{:.1f}'.format(i) for i in (acc * 100).tolist()],\n                ['{:.1f}'.format(i) for i in (iu * 100).tolist()],\n                iu.mean().item() * 100)\n\n\nclass MetricLogger(object):\n    def __init__(self, delimiter=\"\\t\"):\n        self.meters = defaultdict(SmoothedValue)\n        self.delimiter = delimiter\n\n    def update(self, **kwargs):\n        for k, v in kwargs.items():\n            if isinstance(v, torch.Tensor):\n                v = v.item()\n            assert isinstance(v, (float, int))\n            self.meters[k].update(v)\n\n    def __getattr__(self, attr):\n        if attr in self.meters:\n            return self.meters[attr]\n        if attr in self.__dict__:\n            return self.__dict__[attr]\n        raise AttributeError(\"'{}' object has no attribute '{}'\".format(\n            type(self).__name__, attr))\n\n    def __str__(self):\n        loss_str = []\n        for name, meter in self.meters.items():\n            loss_str.append(\n                \"{}: {}\".format(name, str(meter))\n            )\n        return self.delimiter.join(loss_str)\n\n    def synchronize_between_processes(self):\n        for meter in self.meters.values():\n            meter.synchronize_between_processes()\n\n    def add_meter(self, name, meter):\n        self.meters[name] = meter\n\n    def log_every(self, iterable, print_freq, header=None):\n        i = 0\n        if not header:\n            header = ''\n        start_time = time.time()\n        end = time.time()\n        iter_time = SmoothedValue(fmt='{avg:.4f}')\n        data_time = SmoothedValue(fmt='{avg:.4f}')\n        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'\n        if torch.cuda.is_available():\n            log_msg = self.delimiter.join([\n                header,\n                '[{0' + space_fmt + '}/{1}]',\n                'eta: {eta}',\n                '{meters}',\n                'time: {time}',\n                'data: {data}',\n                'max mem: {memory:.0f}'\n            ])\n        else:\n            log_msg = self.delimiter.join([\n                header,\n                '[{0' + space_fmt + '}/{1}]',\n                'eta: {eta}',\n                '{meters}',\n                'time: {time}',\n                'data: {data}'\n            ])\n        MB = 1024.0 * 1024.0\n        for obj in iterable:\n            data_time.update(time.time() - end)\n            yield obj\n            iter_time.update(time.time() - end)\n            if i % print_freq == 0:\n                eta_seconds = iter_time.global_avg * (len(iterable) - i)\n                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))\n                if torch.cuda.is_available():\n                    print(log_msg.format(\n                        i, len(iterable), eta=eta_string,\n                        meters=str(self),\n                        time=str(iter_time), data=str(data_time),\n                        memory=torch.cuda.max_memory_allocated() / MB))\n                else:\n                    print(log_msg.format(\n                        i, len(iterable), eta=eta_string,\n                        meters=str(self),\n                        time=str(iter_time), data=str(data_time)))\n            i += 1\n            end = time.time()\n        total_time = time.time() - start_time\n        total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n        print('{} Total time: {}'.format(header, total_time_str))\n\n\ndef mkdir(path):\n    try:\n        os.makedirs(path)\n    except OSError as e:\n        if e.errno != errno.EEXIST:\n            raise\n\n\ndef setup_for_distributed(is_master):\n    \"\"\"\n    This function disables printing when not in master process\n    \"\"\"\n    import builtins as __builtin__\n    builtin_print = __builtin__.print\n\n    def print(*args, **kwargs):\n        force = kwargs.pop('force', False)\n        if is_master or force:\n            builtin_print(*args, **kwargs)\n\n    __builtin__.print = print\n\n\ndef is_dist_avail_and_initialized():\n    if not dist.is_available():\n        return False\n    if not dist.is_initialized():\n        return False\n    return True\n\n\ndef get_world_size():\n    if not is_dist_avail_and_initialized():\n        return 1\n    return dist.get_world_size()\n\n\ndef get_rank():\n    if not is_dist_avail_and_initialized():\n        return 0\n    return dist.get_rank()\n\n\ndef is_main_process():\n    return get_rank() == 0\n\n\ndef save_on_master(*args, **kwargs):\n    if is_main_process():\n        torch.save(*args, **kwargs)\n\n\ndef init_distributed_mode(args):\n    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:\n        args.rank = int(os.environ[\"RANK\"])\n        args.world_size = int(os.environ['WORLD_SIZE'])\n        args.gpu = int(os.environ['LOCAL_RANK'])\n    elif 'SLURM_PROCID' in os.environ:\n        args.rank = int(os.environ['SLURM_PROCID'])\n        args.gpu = args.rank % torch.cuda.device_count()\n    elif hasattr(args, \"rank\"):\n        pass\n    else:\n        print('Not using distributed mode')\n        args.distributed = False\n        return\n\n    args.distributed = True\n\n    torch.cuda.set_device(args.gpu)\n    args.dist_backend = 'nccl'\n    print('| distributed init (rank {}): {}'.format(\n        args.rank, args.dist_url), flush=True)\n    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,\n                                         world_size=args.world_size, rank=args.rank)\n    setup_for_distributed(args.rank == 0)\n"
  },
  {
    "path": "pytorch_segmentation/deeplab_v3/train_utils/train_and_eval.py",
    "content": "import torch\nfrom torch import nn\nimport train_utils.distributed_utils as utils\n\n\ndef criterion(inputs, target):\n    losses = {}\n    for name, x in inputs.items():\n        # 忽略target中值为255的像素，255的像素是目标边缘或者padding填充\n        losses[name] = nn.functional.cross_entropy(x, target, ignore_index=255)\n\n    if len(losses) == 1:\n        return losses['out']\n\n    return losses['out'] + 0.5 * losses['aux']\n\n\ndef evaluate(model, data_loader, device, num_classes):\n    model.eval()\n    confmat = utils.ConfusionMatrix(num_classes)\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    header = 'Test:'\n    with torch.no_grad():\n        for image, target in metric_logger.log_every(data_loader, 100, header):\n            image, target = image.to(device), target.to(device)\n            output = model(image)\n            output = output['out']\n\n            confmat.update(target.flatten(), output.argmax(1).flatten())\n\n        confmat.reduce_from_all_processes()\n\n    return confmat\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch, lr_scheduler, print_freq=10, scaler=None):\n    model.train()\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))\n    header = 'Epoch: [{}]'.format(epoch)\n\n    for image, target in metric_logger.log_every(data_loader, print_freq, header):\n        image, target = image.to(device), target.to(device)\n        with torch.cuda.amp.autocast(enabled=scaler is not None):\n            output = model(image)\n            loss = criterion(output, target)\n\n        optimizer.zero_grad()\n        if scaler is not None:\n            scaler.scale(loss).backward()\n            scaler.step(optimizer)\n            scaler.update()\n        else:\n            loss.backward()\n            optimizer.step()\n\n        lr_scheduler.step()\n\n        lr = optimizer.param_groups[0][\"lr\"]\n        metric_logger.update(loss=loss.item(), lr=lr)\n\n    return metric_logger.meters[\"loss\"].global_avg, lr\n\n\ndef create_lr_scheduler(optimizer,\n                        num_step: int,\n                        epochs: int,\n                        warmup=True,\n                        warmup_epochs=1,\n                        warmup_factor=1e-3):\n    assert num_step > 0 and epochs > 0\n    if warmup is False:\n        warmup_epochs = 0\n\n    def f(x):\n        \"\"\"\n        根据step数返回一个学习率倍率因子，\n        注意在训练开始之前，pytorch会提前调用一次lr_scheduler.step()方法\n        \"\"\"\n        if warmup is True and x <= (warmup_epochs * num_step):\n            alpha = float(x) / (warmup_epochs * num_step)\n            # warmup过程中lr倍率因子从warmup_factor -> 1\n            return warmup_factor * (1 - alpha) + alpha\n        else:\n            # warmup后lr倍率因子从1 -> 0\n            # 参考deeplab_v2: Learning rate policy\n            return (1 - (x - warmup_epochs * num_step) / ((epochs - warmup_epochs) * num_step)) ** 0.9\n\n    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)\n"
  },
  {
    "path": "pytorch_segmentation/deeplab_v3/transforms.py",
    "content": "import numpy as np\nimport random\n\nimport torch\nfrom torchvision import transforms as T\nfrom torchvision.transforms import functional as F\n\n\ndef pad_if_smaller(img, size, fill=0):\n    # 如果图像最小边长小于给定size，则用数值fill进行padding\n    min_size = min(img.size)\n    if min_size < size:\n        ow, oh = img.size\n        padh = size - oh if oh < size else 0\n        padw = size - ow if ow < size else 0\n        img = F.pad(img, (0, 0, padw, padh), fill=fill)\n    return img\n\n\nclass Compose(object):\n    def __init__(self, transforms):\n        self.transforms = transforms\n\n    def __call__(self, image, target):\n        for t in self.transforms:\n            image, target = t(image, target)\n        return image, target\n\n\nclass RandomResize(object):\n    def __init__(self, min_size, max_size=None):\n        self.min_size = min_size\n        if max_size is None:\n            max_size = min_size\n        self.max_size = max_size\n\n    def __call__(self, image, target):\n        size = random.randint(self.min_size, self.max_size)\n        # 这里size传入的是int类型，所以是将图像的最小边长缩放到size大小\n        image = F.resize(image, size)\n        # 这里的interpolation注意下，在torchvision(0.9.0)以后才有InterpolationMode.NEAREST\n        # 如果是之前的版本需要使用PIL.Image.NEAREST\n        target = F.resize(target, size, interpolation=T.InterpolationMode.NEAREST)\n        return image, target\n\n\nclass RandomHorizontalFlip(object):\n    def __init__(self, flip_prob):\n        self.flip_prob = flip_prob\n\n    def __call__(self, image, target):\n        if random.random() < self.flip_prob:\n            image = F.hflip(image)\n            target = F.hflip(target)\n        return image, target\n\n\nclass RandomCrop(object):\n    def __init__(self, size):\n        self.size = size\n\n    def __call__(self, image, target):\n        image = pad_if_smaller(image, self.size)\n        target = pad_if_smaller(target, self.size, fill=255)\n        crop_params = T.RandomCrop.get_params(image, (self.size, self.size))\n        image = F.crop(image, *crop_params)\n        target = F.crop(target, *crop_params)\n        return image, target\n\n\nclass CenterCrop(object):\n    def __init__(self, size):\n        self.size = size\n\n    def __call__(self, image, target):\n        image = F.center_crop(image, self.size)\n        target = F.center_crop(target, self.size)\n        return image, target\n\n\nclass ToTensor(object):\n    def __call__(self, image, target):\n        image = F.to_tensor(image)\n        target = torch.as_tensor(np.array(target), dtype=torch.int64)\n        return image, target\n\n\nclass Normalize(object):\n    def __init__(self, mean, std):\n        self.mean = mean\n        self.std = std\n\n    def __call__(self, image, target):\n        image = F.normalize(image, mean=self.mean, std=self.std)\n        return image, target\n"
  },
  {
    "path": "pytorch_segmentation/deeplab_v3/validation.py",
    "content": "import os\nimport torch\n\nfrom src import deeplabv3_resnet50\nfrom train_utils import evaluate\nfrom my_dataset import VOCSegmentation\nimport transforms as T\n\n\nclass SegmentationPresetEval:\n    def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        self.transforms = T.Compose([\n            T.RandomResize(base_size, base_size),\n            T.ToTensor(),\n            T.Normalize(mean=mean, std=std),\n        ])\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n    assert os.path.exists(args.weights), f\"weights {args.weights} not found.\"\n\n    # segmentation nun_classes + background\n    num_classes = args.num_classes + 1\n\n    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt\n    val_dataset = VOCSegmentation(args.data_path,\n                                  year=\"2012\",\n                                  transforms=SegmentationPresetEval(520),\n                                  txt_name=\"val.txt\")\n\n    num_workers = 8\n    val_loader = torch.utils.data.DataLoader(val_dataset,\n                                             batch_size=1,\n                                             num_workers=num_workers,\n                                             pin_memory=True,\n                                             collate_fn=val_dataset.collate_fn)\n\n    model = deeplabv3_resnet50(aux=args.aux, num_classes=num_classes)\n    model.load_state_dict(torch.load(args.weights, map_location=device)['model'])\n    model.to(device)\n\n    confmat = evaluate(model, val_loader, device=device, num_classes=num_classes)\n    print(confmat)\n\n\ndef parse_args():\n    import argparse\n    parser = argparse.ArgumentParser(description=\"pytorch deeplabv3 validation\")\n\n    parser.add_argument(\"--data-path\", default=\"/data/\", help=\"VOCdevkit root\")\n    parser.add_argument(\"--weights\", default=\"./save_weights/model_29.pth\")\n    parser.add_argument(\"--num-classes\", default=20, type=int)\n    parser.add_argument(\"--aux\", default=True, type=bool, help=\"auxilier loss\")\n    parser.add_argument(\"--device\", default=\"cuda\", help=\"training device\")\n    parser.add_argument('--print-freq', default=10, type=int, help='print frequency')\n\n    args = parser.parse_args()\n\n    return args\n\n\nif __name__ == '__main__':\n    args = parse_args()\n    main(args)\n"
  },
  {
    "path": "pytorch_segmentation/fcn/README.md",
    "content": "# FCN(Fully Convolutional Networks for Semantic Segmentation)\n\n## 该项目主要是来自pytorch官方torchvision模块中的源码\n* https://github.com/pytorch/vision/tree/main/torchvision/models/segmentation\n\n## 环境配置：\n* Python3.6/3.7/3.8\n* Pytorch1.10\n* Ubuntu或Centos(Windows暂不支持多GPU训练)\n* 最好使用GPU训练\n* 详细环境配置见```requirements.txt```\n\n## 文件结构：\n```\n  ├── src: 模型的backbone以及FCN的搭建\n  ├── train_utils: 训练、验证以及多GPU训练相关模块\n  ├── my_dataset.py: 自定义dataset用于读取VOC数据集\n  ├── train.py: 以fcn_resnet50(这里使用了Dilated/Atrous Convolution)进行训练\n  ├── train_multi_GPU.py: 针对使用多GPU的用户使用\n  ├── predict.py: 简易的预测脚本，使用训练好的权重进行预测测试\n  ├── validation.py: 利用训练好的权重验证/测试数据的mIoU等指标，并生成record_mAP.txt文件\n  └── pascal_voc_classes.json: pascal_voc标签文件\n```\n\n## 预训练权重下载地址：\n* 注意：官方提供的预训练权重是在COCO上预训练得到的，训练时只针对和PASCAL VOC相同的类别进行了训练，所以类别数是21(包括背景)\n* fcn_resnet50: https://download.pytorch.org/models/fcn_resnet50_coco-1167a1af.pth\n* fcn_resnet101: https://download.pytorch.org/models/fcn_resnet101_coco-7ecb50ca.pth\n* 注意，下载的预训练权重记得要重命名，比如在train.py中读取的是```fcn_resnet50_coco.pth```文件，\n  不是```fcn_resnet50_coco-1167a1af.pth```\n \n \n## 数据集，本例程使用的是PASCAL VOC2012数据集\n* Pascal VOC2012 train/val数据集下载地址：http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar\n* 如果不了解数据集或者想使用自己的数据集进行训练，请参考我的博文: https://blog.csdn.net/qq_37541097/article/details/115787033\n\n## 训练方法\n* 确保提前准备好数据集\n* 确保提前下载好对应预训练模型权重\n* 若要使用单GPU或者CPU训练，直接使用train.py训练脚本\n* 若要使用多GPU训练，使用```torchrun --nproc_per_node=8 train_multi_GPU.py```指令,```nproc_per_node```参数为使用GPU数量\n* 如果想指定使用哪些GPU设备可在指令前加上```CUDA_VISIBLE_DEVICES=0,3```(例如我只要使用设备中的第1块和第4块GPU设备)\n* ```CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py```\n\n## 注意事项\n* 在使用训练脚本时，注意要将'--data-path'(VOC_root)设置为自己存放'VOCdevkit'文件夹所在的**根目录**\n* 在使用预测脚本时，要将'weights_path'设置为你自己生成的权重路径。\n* 使用validation文件时，注意确保你的验证集或者测试集中必须包含每个类别的目标，并且使用时只需要修改'--num-classes'、'--aux'、'--data-path'和'--weights'即可，其他代码尽量不要改动\n\n## 如果对FCN原理不是很理解可参考我的bilibili\n* https://www.bilibili.com/video/BV1J3411C7zd\n* https://www.bilibili.com/video/BV1ev411u7TX\n\n## 进一步了解该项目，以及对FCN代码的分析可参考我的bilibili\n* https://www.bilibili.com/video/BV19q4y1971Q\n\n## Pytorch官方实现的FCN网络框架图\n![torch_fcn](torch_fcn.png)\n\n"
  },
  {
    "path": "pytorch_segmentation/fcn/get_palette.py",
    "content": "import json\nimport numpy as np\nfrom PIL import Image\n\n# 读取mask标签\ntarget = Image.open(\"./2007_001288.png\")\n# 获取调色板\npalette = target.getpalette()\npalette = np.reshape(palette, (-1, 3)).tolist()\n# 转换成字典子形式\npd = dict((i, color) for i, color in enumerate(palette))\n\njson_str = json.dumps(pd)\nwith open(\"palette.json\", \"w\") as f:\n    f.write(json_str)\n\n# target = np.array(target)\n# print(target)\n"
  },
  {
    "path": "pytorch_segmentation/fcn/my_dataset.py",
    "content": "import os\n\nimport torch.utils.data as data\nfrom PIL import Image\n\n\nclass VOCSegmentation(data.Dataset):\n    def __init__(self, voc_root, year=\"2012\", transforms=None, txt_name: str = \"train.txt\"):\n        super(VOCSegmentation, self).__init__()\n        assert year in [\"2007\", \"2012\"], \"year must be in ['2007', '2012']\"\n        root = os.path.join(voc_root, \"VOCdevkit\", f\"VOC{year}\")\n        assert os.path.exists(root), \"path '{}' does not exist.\".format(root)\n        image_dir = os.path.join(root, 'JPEGImages')\n        mask_dir = os.path.join(root, 'SegmentationClass')\n\n        txt_path = os.path.join(root, \"ImageSets\", \"Segmentation\", txt_name)\n        assert os.path.exists(txt_path), \"file '{}' does not exist.\".format(txt_path)\n        with open(os.path.join(txt_path), \"r\") as f:\n            file_names = [x.strip() for x in f.readlines() if len(x.strip()) > 0]\n\n        self.images = [os.path.join(image_dir, x + \".jpg\") for x in file_names]\n        self.masks = [os.path.join(mask_dir, x + \".png\") for x in file_names]\n        assert (len(self.images) == len(self.masks))\n        self.transforms = transforms\n\n    def __getitem__(self, index):\n        \"\"\"\n        Args:\n            index (int): Index\n\n        Returns:\n            tuple: (image, target) where target is the image segmentation.\n        \"\"\"\n        img = Image.open(self.images[index]).convert('RGB')\n        target = Image.open(self.masks[index])\n\n        if self.transforms is not None:\n            img, target = self.transforms(img, target)\n\n        return img, target\n\n    def __len__(self):\n        return len(self.images)\n\n    @staticmethod\n    def collate_fn(batch):\n        images, targets = list(zip(*batch))\n        batched_imgs = cat_list(images, fill_value=0)\n        batched_targets = cat_list(targets, fill_value=255)\n        return batched_imgs, batched_targets\n\n\ndef cat_list(images, fill_value=0):\n    # 计算该batch数据中，channel, h, w的最大值\n    max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))\n    batch_shape = (len(images),) + max_size\n    batched_imgs = images[0].new(*batch_shape).fill_(fill_value)\n    for img, pad_img in zip(images, batched_imgs):\n        pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img)\n    return batched_imgs\n\n\n# dataset = VOCSegmentation(voc_root=\"/data/\", transforms=get_transform(train=True))\n# d1 = dataset[0]\n# print(d1)\n"
  },
  {
    "path": "pytorch_segmentation/fcn/palette.json",
    "content": "{\"0\": [0, 0, 0], \"1\": [128, 0, 0], \"2\": [0, 128, 0], \"3\": [128, 128, 0], \"4\": [0, 0, 128], \"5\": [128, 0, 128], \"6\": [0, 128, 128], \"7\": [128, 128, 128], \"8\": [64, 0, 0], \"9\": [192, 0, 0], \"10\": [64, 128, 0], \"11\": [192, 128, 0], \"12\": [64, 0, 128], \"13\": [192, 0, 128], \"14\": [64, 128, 128], \"15\": [192, 128, 128], \"16\": [0, 64, 0], \"17\": [128, 64, 0], \"18\": [0, 192, 0], \"19\": [128, 192, 0], \"20\": [0, 64, 128], \"21\": [128, 64, 128], \"22\": [0, 192, 128], \"23\": [128, 192, 128], \"24\": [64, 64, 0], \"25\": [192, 64, 0], \"26\": [64, 192, 0], \"27\": [192, 192, 0], \"28\": [64, 64, 128], \"29\": [192, 64, 128], \"30\": [64, 192, 128], \"31\": [192, 192, 128], \"32\": [0, 0, 64], \"33\": [128, 0, 64], \"34\": [0, 128, 64], \"35\": [128, 128, 64], \"36\": [0, 0, 192], \"37\": [128, 0, 192], \"38\": [0, 128, 192], \"39\": [128, 128, 192], \"40\": [64, 0, 64], \"41\": [192, 0, 64], \"42\": [64, 128, 64], \"43\": [192, 128, 64], \"44\": [64, 0, 192], \"45\": [192, 0, 192], \"46\": [64, 128, 192], \"47\": [192, 128, 192], \"48\": [0, 64, 64], \"49\": [128, 64, 64], \"50\": [0, 192, 64], \"51\": [128, 192, 64], \"52\": [0, 64, 192], \"53\": [128, 64, 192], \"54\": [0, 192, 192], \"55\": [128, 192, 192], \"56\": [64, 64, 64], \"57\": [192, 64, 64], \"58\": [64, 192, 64], \"59\": [192, 192, 64], \"60\": [64, 64, 192], \"61\": [192, 64, 192], \"62\": [64, 192, 192], \"63\": [192, 192, 192], \"64\": [32, 0, 0], \"65\": [160, 0, 0], \"66\": [32, 128, 0], \"67\": [160, 128, 0], \"68\": [32, 0, 128], \"69\": [160, 0, 128], \"70\": [32, 128, 128], \"71\": [160, 128, 128], \"72\": [96, 0, 0], \"73\": [224, 0, 0], \"74\": [96, 128, 0], \"75\": [224, 128, 0], \"76\": [96, 0, 128], \"77\": [224, 0, 128], \"78\": [96, 128, 128], \"79\": [224, 128, 128], \"80\": [32, 64, 0], \"81\": [160, 64, 0], \"82\": [32, 192, 0], \"83\": [160, 192, 0], \"84\": [32, 64, 128], \"85\": [160, 64, 128], \"86\": [32, 192, 128], \"87\": [160, 192, 128], \"88\": [96, 64, 0], \"89\": [224, 64, 0], \"90\": [96, 192, 0], \"91\": [224, 192, 0], \"92\": [96, 64, 128], \"93\": [224, 64, 128], \"94\": [96, 192, 128], \"95\": [224, 192, 128], \"96\": [32, 0, 64], \"97\": [160, 0, 64], \"98\": [32, 128, 64], \"99\": [160, 128, 64], \"100\": [32, 0, 192], \"101\": [160, 0, 192], \"102\": [32, 128, 192], \"103\": [160, 128, 192], \"104\": [96, 0, 64], \"105\": [224, 0, 64], \"106\": [96, 128, 64], \"107\": [224, 128, 64], \"108\": [96, 0, 192], \"109\": [224, 0, 192], \"110\": [96, 128, 192], \"111\": [224, 128, 192], \"112\": [32, 64, 64], \"113\": [160, 64, 64], \"114\": [32, 192, 64], \"115\": [160, 192, 64], \"116\": [32, 64, 192], \"117\": [160, 64, 192], \"118\": [32, 192, 192], \"119\": [160, 192, 192], \"120\": [96, 64, 64], \"121\": [224, 64, 64], \"122\": [96, 192, 64], \"123\": [224, 192, 64], \"124\": [96, 64, 192], \"125\": [224, 64, 192], \"126\": [96, 192, 192], \"127\": [224, 192, 192], \"128\": [0, 32, 0], \"129\": [128, 32, 0], \"130\": [0, 160, 0], \"131\": [128, 160, 0], \"132\": [0, 32, 128], \"133\": [128, 32, 128], \"134\": [0, 160, 128], \"135\": [128, 160, 128], \"136\": [64, 32, 0], \"137\": [192, 32, 0], \"138\": [64, 160, 0], \"139\": [192, 160, 0], \"140\": [64, 32, 128], \"141\": [192, 32, 128], \"142\": [64, 160, 128], \"143\": [192, 160, 128], \"144\": [0, 96, 0], \"145\": [128, 96, 0], \"146\": [0, 224, 0], \"147\": [128, 224, 0], \"148\": [0, 96, 128], \"149\": [128, 96, 128], \"150\": [0, 224, 128], \"151\": [128, 224, 128], \"152\": [64, 96, 0], \"153\": [192, 96, 0], \"154\": [64, 224, 0], \"155\": [192, 224, 0], \"156\": [64, 96, 128], \"157\": [192, 96, 128], \"158\": [64, 224, 128], \"159\": [192, 224, 128], \"160\": [0, 32, 64], \"161\": [128, 32, 64], \"162\": [0, 160, 64], \"163\": [128, 160, 64], \"164\": [0, 32, 192], \"165\": [128, 32, 192], \"166\": [0, 160, 192], \"167\": [128, 160, 192], \"168\": [64, 32, 64], \"169\": [192, 32, 64], \"170\": [64, 160, 64], \"171\": [192, 160, 64], \"172\": [64, 32, 192], \"173\": [192, 32, 192], \"174\": [64, 160, 192], \"175\": [192, 160, 192], \"176\": [0, 96, 64], \"177\": [128, 96, 64], \"178\": [0, 224, 64], \"179\": [128, 224, 64], \"180\": [0, 96, 192], \"181\": [128, 96, 192], \"182\": [0, 224, 192], \"183\": [128, 224, 192], \"184\": [64, 96, 64], \"185\": [192, 96, 64], \"186\": [64, 224, 64], \"187\": [192, 224, 64], \"188\": [64, 96, 192], \"189\": [192, 96, 192], \"190\": [64, 224, 192], \"191\": [192, 224, 192], \"192\": [32, 32, 0], \"193\": [160, 32, 0], \"194\": [32, 160, 0], \"195\": [160, 160, 0], \"196\": [32, 32, 128], \"197\": [160, 32, 128], \"198\": [32, 160, 128], \"199\": [160, 160, 128], \"200\": [96, 32, 0], \"201\": [224, 32, 0], \"202\": [96, 160, 0], \"203\": [224, 160, 0], \"204\": [96, 32, 128], \"205\": [224, 32, 128], \"206\": [96, 160, 128], \"207\": [224, 160, 128], \"208\": [32, 96, 0], \"209\": [160, 96, 0], \"210\": [32, 224, 0], \"211\": [160, 224, 0], \"212\": [32, 96, 128], \"213\": [160, 96, 128], \"214\": [32, 224, 128], \"215\": [160, 224, 128], \"216\": [96, 96, 0], \"217\": [224, 96, 0], \"218\": [96, 224, 0], \"219\": [224, 224, 0], \"220\": [96, 96, 128], \"221\": [224, 96, 128], \"222\": [96, 224, 128], \"223\": [224, 224, 128], \"224\": [32, 32, 64], \"225\": [160, 32, 64], \"226\": [32, 160, 64], \"227\": [160, 160, 64], \"228\": [32, 32, 192], \"229\": [160, 32, 192], \"230\": [32, 160, 192], \"231\": [160, 160, 192], \"232\": [96, 32, 64], \"233\": [224, 32, 64], \"234\": [96, 160, 64], \"235\": [224, 160, 64], \"236\": [96, 32, 192], \"237\": [224, 32, 192], \"238\": [96, 160, 192], \"239\": [224, 160, 192], \"240\": [32, 96, 64], \"241\": [160, 96, 64], \"242\": [32, 224, 64], \"243\": [160, 224, 64], \"244\": [32, 96, 192], \"245\": [160, 96, 192], \"246\": [32, 224, 192], \"247\": [160, 224, 192], \"248\": [96, 96, 64], \"249\": [224, 96, 64], \"250\": [96, 224, 64], \"251\": [224, 224, 64], \"252\": [96, 96, 192], \"253\": [224, 96, 192], \"254\": [96, 224, 192], \"255\": [224, 224, 192]}"
  },
  {
    "path": "pytorch_segmentation/fcn/pascal_voc_classes.json",
    "content": "{\n    \"aeroplane\": 1,\n    \"bicycle\": 2,\n    \"bird\": 3,\n    \"boat\": 4,\n    \"bottle\": 5,\n    \"bus\": 6,\n    \"car\": 7,\n    \"cat\": 8,\n    \"chair\": 9,\n    \"cow\": 10,\n    \"diningtable\": 11,\n    \"dog\": 12,\n    \"horse\": 13,\n    \"motorbike\": 14,\n    \"person\": 15,\n    \"pottedplant\": 16,\n    \"sheep\": 17,\n    \"sofa\": 18,\n    \"train\": 19,\n    \"tvmonitor\": 20\n}"
  },
  {
    "path": "pytorch_segmentation/fcn/predict.py",
    "content": "import os\nimport time\nimport json\n\nimport torch\nfrom torchvision import transforms\nimport numpy as np\nfrom PIL import Image\n\nfrom src import fcn_resnet50\n\n\ndef time_synchronized():\n    torch.cuda.synchronize() if torch.cuda.is_available() else None\n    return time.time()\n\n\ndef main():\n    aux = False  # inference time not need aux_classifier\n    classes = 20\n    weights_path = \"./save_weights/model_29.pth\"\n    img_path = \"./test.jpg\"\n    palette_path = \"./palette.json\"\n    assert os.path.exists(weights_path), f\"weights {weights_path} not found.\"\n    assert os.path.exists(img_path), f\"image {img_path} not found.\"\n    assert os.path.exists(palette_path), f\"palette {palette_path} not found.\"\n    with open(palette_path, \"rb\") as f:\n        pallette_dict = json.load(f)\n        pallette = []\n        for v in pallette_dict.values():\n            pallette += v\n\n    # get devices\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n    print(\"using {} device.\".format(device))\n\n    # create model\n    model = fcn_resnet50(aux=aux, num_classes=classes+1)\n\n    # delete weights about aux_classifier\n    weights_dict = torch.load(weights_path, map_location='cpu')['model']\n    for k in list(weights_dict.keys()):\n        if \"aux\" in k:\n            del weights_dict[k]\n\n    # load weights\n    model.load_state_dict(weights_dict)\n    model.to(device)\n\n    # load image\n    original_img = Image.open(img_path)\n\n    # from pil image to tensor and normalize\n    data_transform = transforms.Compose([transforms.Resize(520),\n                                         transforms.ToTensor(),\n                                         transforms.Normalize(mean=(0.485, 0.456, 0.406),\n                                                              std=(0.229, 0.224, 0.225))])\n    img = data_transform(original_img)\n    # expand batch dimension\n    img = torch.unsqueeze(img, dim=0)\n\n    model.eval()  # 进入验证模式\n    with torch.no_grad():\n        # init model\n        img_height, img_width = img.shape[-2:]\n        init_img = torch.zeros((1, 3, img_height, img_width), device=device)\n        model(init_img)\n\n        t_start = time_synchronized()\n        output = model(img.to(device))\n        t_end = time_synchronized()\n        print(\"inference time: {}\".format(t_end - t_start))\n\n        prediction = output['out'].argmax(1).squeeze(0)\n        prediction = prediction.to(\"cpu\").numpy().astype(np.uint8)\n        mask = Image.fromarray(prediction)\n        mask.putpalette(pallette)\n        mask.save(\"test_result.png\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_segmentation/fcn/requirements.txt",
    "content": "numpy==1.22.0\ntorch==1.13.1\ntorchvision==0.11.1\nPillow\n"
  },
  {
    "path": "pytorch_segmentation/fcn/results20210918-122740.txt",
    "content": "[epoch: 0] \ntrain_loss: 0.7720 \nlr: 0.000100 \nglobal correct: 93.4\naverage row correct: ['96.8', '90.7', '79.2', '82.7', '78.3', '59.4', '91.2', '82.9', '92.9', '57.8', '87.1', '66.5', '82.8', '78.7', '88.9', '95.1', '62.9', '86.6', '67.1', '89.4', '81.4']\nIoU: ['93.2', '85.9', '39.2', '79.5', '68.9', '55.6', '88.1', '72.8', '81.7', '41.1', '74.9', '54.0', '72.9', '74.6', '77.7', '87.1', '54.4', '75.1', '50.7', '82.9', '72.6']\nmean IoU: 70.6\n\n[epoch: 1] \ntrain_loss: 0.6589 \nlr: 0.000090 \nglobal correct: 93.4\naverage row correct: ['96.5', '89.4', '74.6', '84.5', '82.9', '68.8', '93.8', '84.7', '93.5', '56.9', '87.5', '68.0', '81.8', '78.2', '90.7', '94.8', '65.6', '87.1', '70.9', '89.1', '85.1']\nIoU: ['93.3', '85.5', '38.2', '79.9', '69.8', '62.7', '87.5', '75.7', '80.3', '40.7', '74.8', '54.6', '72.2', '74.1', '76.6', '87.6', '54.7', '72.9', '51.0', '82.6', '70.9']\nmean IoU: 70.7\n\n[epoch: 2] \ntrain_loss: 0.6238 \nlr: 0.000080 \nglobal correct: 93.5\naverage row correct: ['96.5', '93.2', '75.8', '85.3', '84.2', '70.6', '91.7', '85.7', '93.2', '58.8', '76.7', '68.3', '81.4', '83.2', '88.7', '95.1', '69.9', '88.6', '70.5', '91.8', '86.7']\nIoU: ['93.5', '86.1', '39.0', '81.2', '69.9', '63.9', '87.7', '76.4', '80.2', '41.5', '71.8', '56.2', '71.3', '74.4', '78.0', '87.3', '57.3', '70.9', '50.3', '82.8', '71.9']\nmean IoU: 71.0\n\n[epoch: 3] \ntrain_loss: 0.5854 \nlr: 0.000069 \nglobal correct: 93.5\naverage row correct: ['96.7', '91.5', '77.3', '83.9', '80.8', '74.0', '92.6', '86.7', '94.3', '65.0', '68.7', '67.8', '76.9', '88.2', '85.5', '94.5', '71.8', '87.9', '66.7', '89.4', '86.3']\nIoU: ['93.6', '87.0', '39.4', '80.6', '69.4', '66.5', '87.7', '76.9', '78.5', '41.5', '66.3', '55.8', '68.9', '70.8', '78.4', '88.2', '58.7', '71.0', '49.4', '83.3', '74.1']\nmean IoU: 70.8\n\n[epoch: 4] \ntrain_loss: 0.6140 \nlr: 0.000059 \nglobal correct: 93.6\naverage row correct: ['96.5', '92.4', '77.4', '85.1', '80.2', '80.6', '94.1', '87.0', '94.8', '62.8', '87.2', '70.0', '78.8', '77.5', '85.8', '94.7', '73.4', '83.9', '68.6', '88.0', '86.6']\nIoU: ['93.7', '87.1', '39.4', '80.7', '70.1', '70.2', '87.0', '77.3', '78.9', '41.3', '72.3', '56.7', '69.9', '72.2', '77.9', '87.8', '57.8', '72.6', '50.5', '82.1', '74.3']\nmean IoU: 71.4\n\n[epoch: 5] \ntrain_loss: 0.5653 \nlr: 0.000048 \nglobal correct: 93.7\naverage row correct: ['96.6', '87.7', '76.9', '84.3', '79.3', '81.6', '92.6', '88.4', '94.0', '61.9', '76.7', '71.0', '81.5', '88.2', '87.3', '94.5', '73.5', '84.9', '69.1', '91.4', '86.6']\nIoU: ['93.7', '85.6', '40.1', '80.7', '70.2', '70.5', '87.6', '77.4', '80.4', '42.0', '72.4', '57.0', '72.2', '73.4', '78.4', '88.1', '58.6', '74.3', '50.4', '82.7', '73.6']\nmean IoU: 71.9\n\n[epoch: 6] \ntrain_loss: 0.5500 \nlr: 0.000037 \nglobal correct: 93.1\naverage row correct: ['96.4', '91.6', '74.8', '78.4', '83.2', '81.9', '89.4', '88.7', '95.8', '59.4', '57.2', '70.0', '77.4', '75.2', '87.6', '95.2', '74.1', '82.4', '72.4', '91.9', '87.2']\nIoU: ['93.6', '87.5', '39.5', '76.1', '68.5', '71.0', '86.1', '78.6', '74.6', '41.4', '54.6', '57.2', '61.0', '67.0', '78.2', '87.9', '58.1', '67.5', '50.4', '82.0', '74.2']\nmean IoU: 69.3\n\n[epoch: 7] \ntrain_loss: 0.5553 \nlr: 0.000026 \nglobal correct: 93.3\naverage row correct: ['96.7', '88.4', '72.3', '82.2', '80.7', '81.6', '82.5', '89.7', '93.4', '59.0', '69.5', '70.1', '78.8', '86.4', '87.3', '94.9', '70.8', '89.6', '72.2', '85.4', '86.3']\nIoU: ['93.6', '85.5', '39.0', '79.2', '69.8', '70.8', '79.7', '76.8', '79.0', '41.8', '65.9', '57.1', '68.9', '71.0', '78.0', '87.9', '58.6', '66.9', '50.7', '78.2', '74.4']\nmean IoU: 70.1\n\n[epoch: 8] \ntrain_loss: 0.5601 \nlr: 0.000014 \nglobal correct: 93.4\naverage row correct: ['96.5', '91.0', '73.8', '81.4', '83.7', '83.4', '89.9', '88.8', '95.4', '61.3', '80.6', '70.0', '75.4', '84.3', '88.2', '94.9', '72.0', '83.7', '69.7', '83.3', '88.5']\nIoU: ['93.6', '87.2', '40.1', '78.6', '69.8', '71.3', '84.5', '77.6', '76.3', '41.0', '72.4', '56.8', '66.9', '73.2', '77.6', '87.8', '59.2', '72.5', '50.2', '78.7', '69.9']\nmean IoU: 70.7\n\n[epoch: 9] \ntrain_loss: 0.5550 \nlr: 0.000000 \nglobal correct: 93.1\naverage row correct: ['96.7', '93.8', '72.7', '73.0', '82.1', '80.4', '95.6', '86.7', '95.6', '61.8', '63.6', '69.0', '73.2', '65.1', '87.9', '94.5', '73.7', '86.5', '69.0', '88.4', '87.9']\nIoU: ['93.7', '87.2', '39.4', '71.7', '70.4', '70.9', '86.9', '78.5', '73.1', '41.8', '58.4', '56.3', '59.4', '61.9', '78.2', '88.4', '59.3', '63.6', '50.4', '82.6', '73.7']\nmean IoU: 68.8\n\n"
  },
  {
    "path": "pytorch_segmentation/fcn/src/__init__.py",
    "content": "from .fcn_model import fcn_resnet50, fcn_resnet101\n"
  },
  {
    "path": "pytorch_segmentation/fcn/src/backbone.py",
    "content": "import torch\nimport torch.nn as nn\n\n\ndef conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):\n    \"\"\"3x3 convolution with padding\"\"\"\n    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,\n                     padding=dilation, groups=groups, bias=False, dilation=dilation)\n\n\ndef conv1x1(in_planes, out_planes, stride=1):\n    \"\"\"1x1 convolution\"\"\"\n    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)\n\n\nclass Bottleneck(nn.Module):\n    # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)\n    # while original implementation places the stride at the first 1x1 convolution(self.conv1)\n    # according to \"Deep residual learning for image recognition\"https://arxiv.org/abs/1512.03385.\n    # This variant is also known as ResNet V1.5 and improves accuracy according to\n    # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.\n\n    expansion = 4\n\n    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,\n                 base_width=64, dilation=1, norm_layer=None):\n        super(Bottleneck, self).__init__()\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n        width = int(planes * (base_width / 64.)) * groups\n        # Both self.conv2 and self.downsample layers downsample the input when stride != 1\n        self.conv1 = conv1x1(inplanes, width)\n        self.bn1 = norm_layer(width)\n        self.conv2 = conv3x3(width, width, stride, groups, dilation)\n        self.bn2 = norm_layer(width)\n        self.conv3 = conv1x1(width, planes * self.expansion)\n        self.bn3 = norm_layer(planes * self.expansion)\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n        self.stride = stride\n\n    def forward(self, x):\n        identity = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass ResNet(nn.Module):\n\n    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,\n                 groups=1, width_per_group=64, replace_stride_with_dilation=None,\n                 norm_layer=None):\n        super(ResNet, self).__init__()\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n        self._norm_layer = norm_layer\n\n        self.inplanes = 64\n        self.dilation = 1\n        if replace_stride_with_dilation is None:\n            # each element in the tuple indicates if we should replace\n            # the 2x2 stride with a dilated convolution instead\n            replace_stride_with_dilation = [False, False, False]\n        if len(replace_stride_with_dilation) != 3:\n            raise ValueError(\"replace_stride_with_dilation should be None \"\n                             \"or a 3-element tuple, got {}\".format(replace_stride_with_dilation))\n        self.groups = groups\n        self.base_width = width_per_group\n        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,\n                               bias=False)\n        self.bn1 = norm_layer(self.inplanes)\n        self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.layer1 = self._make_layer(block, 64, layers[0])\n        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,\n                                       dilate=replace_stride_with_dilation[0])\n        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,\n                                       dilate=replace_stride_with_dilation[1])\n        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,\n                                       dilate=replace_stride_with_dilation[2])\n        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))\n        self.fc = nn.Linear(512 * block.expansion, num_classes)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')\n            elif isinstance(m, nn.BatchNorm2d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n\n        # Zero-initialize the last BN in each residual branch,\n        # so that the residual branch starts with zeros, and each residual block behaves like an identity.\n        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677\n        if zero_init_residual:\n            for m in self.modules():\n                if isinstance(m, Bottleneck):\n                    nn.init.constant_(m.bn3.weight, 0)\n\n    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):\n        norm_layer = self._norm_layer\n        downsample = None\n        previous_dilation = self.dilation\n        if dilate:\n            self.dilation *= stride\n            stride = 1\n        if stride != 1 or self.inplanes != planes * block.expansion:\n            downsample = nn.Sequential(\n                conv1x1(self.inplanes, planes * block.expansion, stride),\n                norm_layer(planes * block.expansion),\n            )\n\n        layers = []\n        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,\n                            self.base_width, previous_dilation, norm_layer))\n        self.inplanes = planes * block.expansion\n        for _ in range(1, blocks):\n            layers.append(block(self.inplanes, planes, groups=self.groups,\n                                base_width=self.base_width, dilation=self.dilation,\n                                norm_layer=norm_layer))\n\n        return nn.Sequential(*layers)\n\n    def _forward_impl(self, x):\n        # See note [TorchScript super()]\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n\n        x = self.avgpool(x)\n        x = torch.flatten(x, 1)\n        x = self.fc(x)\n\n        return x\n\n    def forward(self, x):\n        return self._forward_impl(x)\n\n\ndef _resnet(block, layers, **kwargs):\n    model = ResNet(block, layers, **kwargs)\n    return model\n\n\ndef resnet50(**kwargs):\n    r\"\"\"ResNet-50 model from\n    `\"Deep Residual Learning for Image Recognition\" <https://arxiv.org/pdf/1512.03385.pdf>`_\n\n    Args:\n        pretrained (bool): If True, returns a model pre-trained on ImageNet\n        progress (bool): If True, displays a progress bar of the download to stderr\n    \"\"\"\n    return _resnet(Bottleneck, [3, 4, 6, 3], **kwargs)\n\n\ndef resnet101(**kwargs):\n    r\"\"\"ResNet-101 model from\n    `\"Deep Residual Learning for Image Recognition\" <https://arxiv.org/pdf/1512.03385.pdf>`_\n\n    Args:\n        pretrained (bool): If True, returns a model pre-trained on ImageNet\n        progress (bool): If True, displays a progress bar of the download to stderr\n    \"\"\"\n    return _resnet(Bottleneck, [3, 4, 23, 3], **kwargs)\n"
  },
  {
    "path": "pytorch_segmentation/fcn/src/fcn_model.py",
    "content": "from collections import OrderedDict\n\nfrom typing import Dict\n\nimport torch\nfrom torch import nn, Tensor\nfrom torch.nn import functional as F\nfrom .backbone import resnet50, resnet101\n\n\nclass IntermediateLayerGetter(nn.ModuleDict):\n    \"\"\"\n    Module wrapper that returns intermediate layers from a model\n\n    It has a strong assumption that the modules have been registered\n    into the model in the same order as they are used.\n    This means that one should **not** reuse the same nn.Module\n    twice in the forward if you want this to work.\n\n    Additionally, it is only able to query submodules that are directly\n    assigned to the model. So if `model` is passed, `model.feature1` can\n    be returned, but not `model.feature1.layer2`.\n\n    Args:\n        model (nn.Module): model on which we will extract the features\n        return_layers (Dict[name, new_name]): a dict containing the names\n            of the modules for which the activations will be returned as\n            the key of the dict, and the value of the dict is the name\n            of the returned activation (which the user can specify).\n    \"\"\"\n    _version = 2\n    __annotations__ = {\n        \"return_layers\": Dict[str, str],\n    }\n\n    def __init__(self, model: nn.Module, return_layers: Dict[str, str]) -> None:\n        if not set(return_layers).issubset([name for name, _ in model.named_children()]):\n            raise ValueError(\"return_layers are not present in model\")\n        orig_return_layers = return_layers\n        return_layers = {str(k): str(v) for k, v in return_layers.items()}\n\n        # 重新构建backbone，将没有使用到的模块全部删掉\n        layers = OrderedDict()\n        for name, module in model.named_children():\n            layers[name] = module\n            if name in return_layers:\n                del return_layers[name]\n            if not return_layers:\n                break\n\n        super(IntermediateLayerGetter, self).__init__(layers)\n        self.return_layers = orig_return_layers\n\n    def forward(self, x: Tensor) -> Dict[str, Tensor]:\n        out = OrderedDict()\n        for name, module in self.items():\n            x = module(x)\n            if name in self.return_layers:\n                out_name = self.return_layers[name]\n                out[out_name] = x\n        return out\n\n\nclass FCN(nn.Module):\n    \"\"\"\n    Implements a Fully-Convolutional Network for semantic segmentation.\n\n    Args:\n        backbone (nn.Module): the network used to compute the features for the model.\n            The backbone should return an OrderedDict[Tensor], with the key being\n            \"out\" for the last feature map used, and \"aux\" if an auxiliary classifier\n            is used.\n        classifier (nn.Module): module that takes the \"out\" element returned from\n            the backbone and returns a dense prediction.\n        aux_classifier (nn.Module, optional): auxiliary classifier used during training\n    \"\"\"\n    __constants__ = ['aux_classifier']\n\n    def __init__(self, backbone, classifier, aux_classifier=None):\n        super(FCN, self).__init__()\n        self.backbone = backbone\n        self.classifier = classifier\n        self.aux_classifier = aux_classifier\n\n    def forward(self, x: Tensor) -> Dict[str, Tensor]:\n        input_shape = x.shape[-2:]\n        # contract: features is a dict of tensors\n        features = self.backbone(x)\n\n        result = OrderedDict()\n        x = features[\"out\"]\n        x = self.classifier(x)\n        # 原论文中虽然使用的是ConvTranspose2d，但权重是冻结的，所以就是一个bilinear插值\n        x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False)\n        result[\"out\"] = x\n\n        if self.aux_classifier is not None:\n            x = features[\"aux\"]\n            x = self.aux_classifier(x)\n            # 原论文中虽然使用的是ConvTranspose2d，但权重是冻结的，所以就是一个bilinear插值\n            x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False)\n            result[\"aux\"] = x\n\n        return result\n\n\nclass FCNHead(nn.Sequential):\n    def __init__(self, in_channels, channels):\n        inter_channels = in_channels // 4\n        layers = [\n            nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False),\n            nn.BatchNorm2d(inter_channels),\n            nn.ReLU(),\n            nn.Dropout(0.1),\n            nn.Conv2d(inter_channels, channels, 1)\n        ]\n\n        super(FCNHead, self).__init__(*layers)\n\n\ndef fcn_resnet50(aux, num_classes=21, pretrain_backbone=False):\n    # 'resnet50_imagenet': 'https://download.pytorch.org/models/resnet50-0676ba61.pth'\n    # 'fcn_resnet50_coco': 'https://download.pytorch.org/models/fcn_resnet50_coco-1167a1af.pth'\n    backbone = resnet50(replace_stride_with_dilation=[False, True, True])\n\n    if pretrain_backbone:\n        # 载入resnet50 backbone预训练权重\n        backbone.load_state_dict(torch.load(\"resnet50.pth\", map_location='cpu'))\n\n    out_inplanes = 2048\n    aux_inplanes = 1024\n\n    return_layers = {'layer4': 'out'}\n    if aux:\n        return_layers['layer3'] = 'aux'\n    backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)\n\n    aux_classifier = None\n    # why using aux: https://github.com/pytorch/vision/issues/4292\n    if aux:\n        aux_classifier = FCNHead(aux_inplanes, num_classes)\n\n    classifier = FCNHead(out_inplanes, num_classes)\n\n    model = FCN(backbone, classifier, aux_classifier)\n\n    return model\n\n\ndef fcn_resnet101(aux, num_classes=21, pretrain_backbone=False):\n    # 'resnet101_imagenet': 'https://download.pytorch.org/models/resnet101-63fe2227.pth'\n    # 'fcn_resnet101_coco': 'https://download.pytorch.org/models/fcn_resnet101_coco-7ecb50ca.pth'\n    backbone = resnet101(replace_stride_with_dilation=[False, True, True])\n\n    if pretrain_backbone:\n        # 载入resnet101 backbone预训练权重\n        backbone.load_state_dict(torch.load(\"resnet101.pth\", map_location='cpu'))\n\n    out_inplanes = 2048\n    aux_inplanes = 1024\n\n    return_layers = {'layer4': 'out'}\n    if aux:\n        return_layers['layer3'] = 'aux'\n    backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)\n\n    aux_classifier = None\n    # why using aux: https://github.com/pytorch/vision/issues/4292\n    if aux:\n        aux_classifier = FCNHead(aux_inplanes, num_classes)\n\n    classifier = FCNHead(out_inplanes, num_classes)\n\n    model = FCN(backbone, classifier, aux_classifier)\n\n    return model\n"
  },
  {
    "path": "pytorch_segmentation/fcn/train.py",
    "content": "import os\nimport time\nimport datetime\n\nimport torch\n\nfrom src import fcn_resnet50\nfrom train_utils import train_one_epoch, evaluate, create_lr_scheduler\nfrom my_dataset import VOCSegmentation\nimport transforms as T\n\n\nclass SegmentationPresetTrain:\n    def __init__(self, base_size, crop_size, hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        min_size = int(0.5 * base_size)\n        max_size = int(2.0 * base_size)\n\n        trans = [T.RandomResize(min_size, max_size)]\n        if hflip_prob > 0:\n            trans.append(T.RandomHorizontalFlip(hflip_prob))\n        trans.extend([\n            T.RandomCrop(crop_size),\n            T.ToTensor(),\n            T.Normalize(mean=mean, std=std),\n        ])\n        self.transforms = T.Compose(trans)\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\nclass SegmentationPresetEval:\n    def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        self.transforms = T.Compose([\n            T.RandomResize(base_size, base_size),\n            T.ToTensor(),\n            T.Normalize(mean=mean, std=std),\n        ])\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\ndef get_transform(train):\n    base_size = 520\n    crop_size = 480\n\n    return SegmentationPresetTrain(base_size, crop_size) if train else SegmentationPresetEval(base_size)\n\n\ndef create_model(aux, num_classes, pretrain=True):\n    model = fcn_resnet50(aux=aux, num_classes=num_classes)\n\n    if pretrain:\n        weights_dict = torch.load(\"./fcn_resnet50_coco.pth\", map_location='cpu')\n\n        if num_classes != 21:\n            # 官方提供的预训练权重是21类(包括背景)\n            # 如果训练自己的数据集，将和类别相关的权重删除，防止权重shape不一致报错\n            for k in list(weights_dict.keys()):\n                if \"classifier.4\" in k:\n                    del weights_dict[k]\n\n        missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False)\n        if len(missing_keys) != 0 or len(unexpected_keys) != 0:\n            print(\"missing_keys: \", missing_keys)\n            print(\"unexpected_keys: \", unexpected_keys)\n\n    return model\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n    batch_size = args.batch_size\n    # segmentation nun_classes + background\n    num_classes = args.num_classes + 1\n\n    # 用来保存训练以及验证过程中信息\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> train.txt\n    train_dataset = VOCSegmentation(args.data_path,\n                                    year=\"2012\",\n                                    transforms=get_transform(train=True),\n                                    txt_name=\"train.txt\")\n\n    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt\n    val_dataset = VOCSegmentation(args.data_path,\n                                  year=\"2012\",\n                                  transforms=get_transform(train=False),\n                                  txt_name=\"val.txt\")\n\n    num_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])\n    train_loader = torch.utils.data.DataLoader(train_dataset,\n                                               batch_size=batch_size,\n                                               num_workers=num_workers,\n                                               shuffle=True,\n                                               pin_memory=True,\n                                               collate_fn=train_dataset.collate_fn)\n\n    val_loader = torch.utils.data.DataLoader(val_dataset,\n                                             batch_size=1,\n                                             num_workers=num_workers,\n                                             pin_memory=True,\n                                             collate_fn=val_dataset.collate_fn)\n\n    model = create_model(aux=args.aux, num_classes=num_classes)\n    model.to(device)\n\n    params_to_optimize = [\n        {\"params\": [p for p in model.backbone.parameters() if p.requires_grad]},\n        {\"params\": [p for p in model.classifier.parameters() if p.requires_grad]}\n    ]\n\n    if args.aux:\n        params = [p for p in model.aux_classifier.parameters() if p.requires_grad]\n        params_to_optimize.append({\"params\": params, \"lr\": args.lr * 10})\n\n    optimizer = torch.optim.SGD(\n        params_to_optimize,\n        lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay\n    )\n\n    scaler = torch.cuda.amp.GradScaler() if args.amp else None\n\n    # 创建学习率更新策略，这里是每个step更新一次(不是每个epoch)\n    lr_scheduler = create_lr_scheduler(optimizer, len(train_loader), args.epochs, warmup=True)\n\n    if args.resume:\n        checkpoint = torch.load(args.resume, map_location='cpu')\n        model.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n        if args.amp:\n            scaler.load_state_dict(checkpoint[\"scaler\"])\n\n    start_time = time.time()\n    for epoch in range(args.start_epoch, args.epochs):\n        mean_loss, lr = train_one_epoch(model, optimizer, train_loader, device, epoch,\n                                        lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler)\n\n        confmat = evaluate(model, val_loader, device=device, num_classes=num_classes)\n        val_info = str(confmat)\n        print(val_info)\n        # write into txt\n        with open(results_file, \"a\") as f:\n            # 记录每个epoch对应的train_loss、lr以及验证集各指标\n            train_info = f\"[epoch: {epoch}]\\n\" \\\n                         f\"train_loss: {mean_loss:.4f}\\n\" \\\n                         f\"lr: {lr:.6f}\\n\"\n            f.write(train_info + val_info + \"\\n\\n\")\n\n        save_file = {\"model\": model.state_dict(),\n                     \"optimizer\": optimizer.state_dict(),\n                     \"lr_scheduler\": lr_scheduler.state_dict(),\n                     \"epoch\": epoch,\n                     \"args\": args}\n        if args.amp:\n            save_file[\"scaler\"] = scaler.state_dict()\n        torch.save(save_file, \"save_weights/model_{}.pth\".format(epoch))\n\n    total_time = time.time() - start_time\n    total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n    print(\"training time {}\".format(total_time_str))\n\n\ndef parse_args():\n    import argparse\n    parser = argparse.ArgumentParser(description=\"pytorch fcn training\")\n\n    parser.add_argument(\"--data-path\", default=\"/data/\", help=\"VOCdevkit root\")\n    parser.add_argument(\"--num-classes\", default=20, type=int)\n    parser.add_argument(\"--aux\", default=True, type=bool, help=\"auxilier loss\")\n    parser.add_argument(\"--device\", default=\"cuda\", help=\"training device\")\n    parser.add_argument(\"-b\", \"--batch-size\", default=4, type=int)\n    parser.add_argument(\"--epochs\", default=30, type=int, metavar=\"N\",\n                        help=\"number of total epochs to train\")\n\n    parser.add_argument('--lr', default=0.0001, type=float, help='initial learning rate')\n    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',\n                        help='momentum')\n    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,\n                        metavar='W', help='weight decay (default: 1e-4)',\n                        dest='weight_decay')\n    parser.add_argument('--print-freq', default=10, type=int, help='print frequency')\n    parser.add_argument('--resume', default='', help='resume from checkpoint')\n    parser.add_argument('--start-epoch', default=0, type=int, metavar='N',\n                        help='start epoch')\n    # Mixed precision training parameters\n    parser.add_argument(\"--amp\", default=False, type=bool,\n                        help=\"Use torch.cuda.amp for mixed precision training\")\n\n    args = parser.parse_args()\n\n    return args\n\n\nif __name__ == '__main__':\n    args = parse_args()\n\n    if not os.path.exists(\"./save_weights\"):\n        os.mkdir(\"./save_weights\")\n\n    main(args)\n"
  },
  {
    "path": "pytorch_segmentation/fcn/train_multi_GPU.py",
    "content": "import time\nimport os\nimport datetime\n\nimport torch\n\nfrom src import fcn_resnet50\nfrom train_utils import train_one_epoch, evaluate, create_lr_scheduler, init_distributed_mode, save_on_master, mkdir\nfrom my_dataset import VOCSegmentation\nimport transforms as T\n\n\nclass SegmentationPresetTrain:\n    def __init__(self, base_size, crop_size, hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        min_size = int(0.5 * base_size)\n        max_size = int(2.0 * base_size)\n\n        trans = [T.RandomResize(min_size, max_size)]\n        if hflip_prob > 0:\n            trans.append(T.RandomHorizontalFlip(hflip_prob))\n        trans.extend([\n            T.RandomCrop(crop_size),\n            T.ToTensor(),\n            T.Normalize(mean=mean, std=std),\n        ])\n        self.transforms = T.Compose(trans)\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\nclass SegmentationPresetEval:\n    def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        self.transforms = T.Compose([\n            T.RandomResize(base_size, base_size),\n            T.ToTensor(),\n            T.Normalize(mean=mean, std=std),\n        ])\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\ndef get_transform(train):\n    base_size = 520\n    crop_size = 480\n\n    return SegmentationPresetTrain(base_size, crop_size) if train else SegmentationPresetEval(base_size)\n\n\ndef create_model(aux, num_classes):\n    model = fcn_resnet50(aux=aux, num_classes=num_classes)\n    weights_dict = torch.load(\"./fcn_resnet50_coco.pth\", map_location='cpu')\n\n    if num_classes != 21:\n        # 官方提供的预训练权重是21类(包括背景)\n        # 如果训练自己的数据集，将和类别相关的权重删除，防止权重shape不一致报错\n        for k in list(weights_dict.keys()):\n            if \"classifier.4\" in k:\n                del weights_dict[k]\n\n    missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False)\n    if len(missing_keys) != 0 or len(unexpected_keys) != 0:\n        print(\"missing_keys: \", missing_keys)\n        print(\"unexpected_keys: \", unexpected_keys)\n\n    return model\n\n\ndef main(args):\n    init_distributed_mode(args)\n    print(args)\n\n    device = torch.device(args.device)\n    # segmentation nun_classes + background\n    num_classes = args.num_classes + 1\n\n    # 用来保存coco_info的文件\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    VOC_root = args.data_path\n    # check voc root\n    if os.path.exists(os.path.join(VOC_root, \"VOCdevkit\")) is False:\n        raise FileNotFoundError(\"VOCdevkit dose not in path:'{}'.\".format(VOC_root))\n\n    # load train data set\n    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> train.txt\n    train_dataset = VOCSegmentation(args.data_path,\n                                    year=\"2012\",\n                                    transforms=get_transform(train=True),\n                                    txt_name=\"train.txt\")\n    # load validation data set\n    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt\n    val_dataset = VOCSegmentation(args.data_path,\n                                  year=\"2012\",\n                                  transforms=get_transform(train=False),\n                                  txt_name=\"val.txt\")\n\n    print(\"Creating data loaders\")\n    if args.distributed:\n        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)\n        test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)\n    else:\n        train_sampler = torch.utils.data.RandomSampler(train_dataset)\n        test_sampler = torch.utils.data.SequentialSampler(val_dataset)\n\n    train_data_loader = torch.utils.data.DataLoader(\n        train_dataset, batch_size=args.batch_size,\n        sampler=train_sampler, num_workers=args.workers,\n        collate_fn=train_dataset.collate_fn, drop_last=True)\n\n    val_data_loader = torch.utils.data.DataLoader(\n        val_dataset, batch_size=1,\n        sampler=test_sampler, num_workers=args.workers,\n        collate_fn=train_dataset.collate_fn)\n\n    print(\"Creating model\")\n    # create model num_classes equal background + 20 classes\n    model = create_model(aux=args.aux, num_classes=num_classes)\n    model.to(device)\n\n    if args.sync_bn:\n        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)\n\n    model_without_ddp = model\n    if args.distributed:\n        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])\n        model_without_ddp = model.module\n\n    params_to_optimize = [\n        {\"params\": [p for p in model_without_ddp.backbone.parameters() if p.requires_grad]},\n        {\"params\": [p for p in model_without_ddp.classifier.parameters() if p.requires_grad]},\n    ]\n    if args.aux:\n        params = [p for p in model_without_ddp.aux_classifier.parameters() if p.requires_grad]\n        params_to_optimize.append({\"params\": params, \"lr\": args.lr * 10})\n    optimizer = torch.optim.SGD(\n        params_to_optimize,\n        lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)\n\n    scaler = torch.cuda.amp.GradScaler() if args.amp else None\n\n    # 创建学习率更新策略，这里是每个step更新一次(不是每个epoch)\n    lr_scheduler = create_lr_scheduler(optimizer, len(train_data_loader), args.epochs, warmup=True)\n\n    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练\n    if args.resume:\n        # If map_location is missing, torch.load will first load the module to CPU\n        # and then copy each parameter to where it was saved,\n        # which would result in all processes on the same machine using the same set of devices.\n        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)\n        model_without_ddp.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n        if args.amp:\n            scaler.load_state_dict(checkpoint[\"scaler\"])\n\n    if args.test_only:\n        confmat = evaluate(model, val_data_loader, device=device, num_classes=num_classes)\n        val_info = str(confmat)\n        print(val_info)\n        return\n\n    print(\"Start training\")\n    start_time = time.time()\n    for epoch in range(args.start_epoch, args.epochs):\n        if args.distributed:\n            train_sampler.set_epoch(epoch)\n        mean_loss, lr = train_one_epoch(model, optimizer, train_data_loader, device, epoch,\n                                        lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler)\n\n        confmat = evaluate(model, val_data_loader, device=device, num_classes=num_classes)\n        val_info = str(confmat)\n        print(val_info)\n\n        # 只在主进程上进行写操作\n        if args.rank in [-1, 0]:\n            # write into txt\n            with open(results_file, \"a\") as f:\n                # 记录每个epoch对应的train_loss、lr以及验证集各指标\n                train_info = f\"[epoch: {epoch}]\\n\" \\\n                             f\"train_loss: {mean_loss:.4f}\\n\" \\\n                             f\"lr: {lr:.6f}\\n\"\n                f.write(train_info + val_info + \"\\n\\n\")\n\n        if args.output_dir:\n            # 只在主节点上执行保存权重操作\n            save_file = {'model': model_without_ddp.state_dict(),\n                         'optimizer': optimizer.state_dict(),\n                         'lr_scheduler': lr_scheduler.state_dict(),\n                         'args': args,\n                         'epoch': epoch}\n            if args.amp:\n                save_file[\"scaler\"] = scaler.state_dict()\n            save_on_master(save_file,\n                           os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))\n\n    total_time = time.time() - start_time\n    total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n    print('Training time {}'.format(total_time_str))\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 训练文件的根目录(VOCdevkit)\n    parser.add_argument('--data-path', default='/data/', help='dataset')\n    # 训练设备类型\n    parser.add_argument('--device', default='cuda', help='device')\n    # 检测目标类别数(不包含背景)\n    parser.add_argument('--num-classes', default=20, type=int, help='num_classes')\n    # 每块GPU上的batch_size\n    parser.add_argument('-b', '--batch-size', default=4, type=int,\n                        help='images per gpu, the total batch size is $NGPU x batch_size')\n    parser.add_argument(\"--aux\", default=True, type=bool, help=\"auxilier loss\")\n    # 指定接着从哪个epoch数开始训练\n    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')\n    # 训练的总epoch数\n    parser.add_argument('--epochs', default=20, type=int, metavar='N',\n                        help='number of total epochs to run')\n    # 是否使用同步BN(在多个GPU之间同步)，默认不开启，开启后训练速度会变慢\n    parser.add_argument('--sync_bn', type=bool, default=False, help='whether using SyncBatchNorm')\n    # 数据加载以及预处理的线程数\n    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',\n                        help='number of data loading workers (default: 4)')\n    # 训练学习率，这里默认设置成0.0001，如果效果不好可以尝试加大学习率\n    parser.add_argument('--lr', default=0.0001, type=float,\n                        help='initial learning rate')\n    # SGD的momentum参数\n    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',\n                        help='momentum')\n    # SGD的weight_decay参数\n    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,\n                        metavar='W', help='weight decay (default: 1e-4)',\n                        dest='weight_decay')\n    # 训练过程打印信息的频率\n    parser.add_argument('--print-freq', default=20, type=int, help='print frequency')\n    # 文件保存地址\n    parser.add_argument('--output-dir', default='./multi_train', help='path where to save')\n    # 基于上次的训练结果接着训练\n    parser.add_argument('--resume', default='', help='resume from checkpoint')\n    # 不训练，仅测试\n    parser.add_argument(\n        \"--test-only\",\n        dest=\"test_only\",\n        help=\"Only test the model\",\n        action=\"store_true\",\n    )\n\n    # 分布式进程数\n    parser.add_argument('--world-size', default=1, type=int,\n                        help='number of distributed processes')\n    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')\n    # Mixed precision training parameters\n    parser.add_argument(\"--amp\", default=False, type=bool,\n                        help=\"Use torch.cuda.amp for mixed precision training\")\n\n    args = parser.parse_args()\n\n    # 如果指定了保存文件地址，检查文件夹是否存在，若不存在，则创建\n    if args.output_dir:\n        mkdir(args.output_dir)\n\n    main(args)\n"
  },
  {
    "path": "pytorch_segmentation/fcn/train_utils/__init__.py",
    "content": "from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler\nfrom .distributed_utils import init_distributed_mode, save_on_master, mkdir\n"
  },
  {
    "path": "pytorch_segmentation/fcn/train_utils/distributed_utils.py",
    "content": "from collections import defaultdict, deque\nimport datetime\nimport time\nimport torch\nimport torch.distributed as dist\n\nimport errno\nimport os\n\n\nclass SmoothedValue(object):\n    \"\"\"Track a series of values and provide access to smoothed values over a\n    window or the global series average.\n    \"\"\"\n\n    def __init__(self, window_size=20, fmt=None):\n        if fmt is None:\n            fmt = \"{value:.4f} ({global_avg:.4f})\"\n        self.deque = deque(maxlen=window_size)\n        self.total = 0.0\n        self.count = 0\n        self.fmt = fmt\n\n    def update(self, value, n=1):\n        self.deque.append(value)\n        self.count += n\n        self.total += value * n\n\n    def synchronize_between_processes(self):\n        \"\"\"\n        Warning: does not synchronize the deque!\n        \"\"\"\n        if not is_dist_avail_and_initialized():\n            return\n        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')\n        dist.barrier()\n        dist.all_reduce(t)\n        t = t.tolist()\n        self.count = int(t[0])\n        self.total = t[1]\n\n    @property\n    def median(self):\n        d = torch.tensor(list(self.deque))\n        return d.median().item()\n\n    @property\n    def avg(self):\n        d = torch.tensor(list(self.deque), dtype=torch.float32)\n        return d.mean().item()\n\n    @property\n    def global_avg(self):\n        return self.total / self.count\n\n    @property\n    def max(self):\n        return max(self.deque)\n\n    @property\n    def value(self):\n        return self.deque[-1]\n\n    def __str__(self):\n        return self.fmt.format(\n            median=self.median,\n            avg=self.avg,\n            global_avg=self.global_avg,\n            max=self.max,\n            value=self.value)\n\n\nclass ConfusionMatrix(object):\n    def __init__(self, num_classes):\n        self.num_classes = num_classes\n        self.mat = None\n\n    def update(self, a, b):\n        n = self.num_classes\n        if self.mat is None:\n            # 创建混淆矩阵\n            self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device)\n        with torch.no_grad():\n            # 寻找GT中为目标的像素索引\n            k = (a >= 0) & (a < n)\n            # 统计像素真实类别a[k]被预测成类别b[k]的个数(这里的做法很巧妙)\n            inds = n * a[k].to(torch.int64) + b[k]\n            self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n)\n\n    def reset(self):\n        if self.mat is not None:\n            self.mat.zero_()\n\n    def compute(self):\n        h = self.mat.float()\n        # 计算全局预测准确率(混淆矩阵的对角线为预测正确的个数)\n        acc_global = torch.diag(h).sum() / h.sum()\n        # 计算每个类别的准确率\n        acc = torch.diag(h) / h.sum(1)\n        # 计算每个类别预测与真实目标的iou\n        iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h))\n        return acc_global, acc, iu\n\n    def reduce_from_all_processes(self):\n        if not torch.distributed.is_available():\n            return\n        if not torch.distributed.is_initialized():\n            return\n        torch.distributed.barrier()\n        torch.distributed.all_reduce(self.mat)\n\n    def __str__(self):\n        acc_global, acc, iu = self.compute()\n        return (\n            'global correct: {:.1f}\\n'\n            'average row correct: {}\\n'\n            'IoU: {}\\n'\n            'mean IoU: {:.1f}').format(\n                acc_global.item() * 100,\n                ['{:.1f}'.format(i) for i in (acc * 100).tolist()],\n                ['{:.1f}'.format(i) for i in (iu * 100).tolist()],\n                iu.mean().item() * 100)\n\n\nclass MetricLogger(object):\n    def __init__(self, delimiter=\"\\t\"):\n        self.meters = defaultdict(SmoothedValue)\n        self.delimiter = delimiter\n\n    def update(self, **kwargs):\n        for k, v in kwargs.items():\n            if isinstance(v, torch.Tensor):\n                v = v.item()\n            assert isinstance(v, (float, int))\n            self.meters[k].update(v)\n\n    def __getattr__(self, attr):\n        if attr in self.meters:\n            return self.meters[attr]\n        if attr in self.__dict__:\n            return self.__dict__[attr]\n        raise AttributeError(\"'{}' object has no attribute '{}'\".format(\n            type(self).__name__, attr))\n\n    def __str__(self):\n        loss_str = []\n        for name, meter in self.meters.items():\n            loss_str.append(\n                \"{}: {}\".format(name, str(meter))\n            )\n        return self.delimiter.join(loss_str)\n\n    def synchronize_between_processes(self):\n        for meter in self.meters.values():\n            meter.synchronize_between_processes()\n\n    def add_meter(self, name, meter):\n        self.meters[name] = meter\n\n    def log_every(self, iterable, print_freq, header=None):\n        i = 0\n        if not header:\n            header = ''\n        start_time = time.time()\n        end = time.time()\n        iter_time = SmoothedValue(fmt='{avg:.4f}')\n        data_time = SmoothedValue(fmt='{avg:.4f}')\n        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'\n        if torch.cuda.is_available():\n            log_msg = self.delimiter.join([\n                header,\n                '[{0' + space_fmt + '}/{1}]',\n                'eta: {eta}',\n                '{meters}',\n                'time: {time}',\n                'data: {data}',\n                'max mem: {memory:.0f}'\n            ])\n        else:\n            log_msg = self.delimiter.join([\n                header,\n                '[{0' + space_fmt + '}/{1}]',\n                'eta: {eta}',\n                '{meters}',\n                'time: {time}',\n                'data: {data}'\n            ])\n        MB = 1024.0 * 1024.0\n        for obj in iterable:\n            data_time.update(time.time() - end)\n            yield obj\n            iter_time.update(time.time() - end)\n            if i % print_freq == 0:\n                eta_seconds = iter_time.global_avg * (len(iterable) - i)\n                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))\n                if torch.cuda.is_available():\n                    print(log_msg.format(\n                        i, len(iterable), eta=eta_string,\n                        meters=str(self),\n                        time=str(iter_time), data=str(data_time),\n                        memory=torch.cuda.max_memory_allocated() / MB))\n                else:\n                    print(log_msg.format(\n                        i, len(iterable), eta=eta_string,\n                        meters=str(self),\n                        time=str(iter_time), data=str(data_time)))\n            i += 1\n            end = time.time()\n        total_time = time.time() - start_time\n        total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n        print('{} Total time: {}'.format(header, total_time_str))\n\n\ndef mkdir(path):\n    try:\n        os.makedirs(path)\n    except OSError as e:\n        if e.errno != errno.EEXIST:\n            raise\n\n\ndef setup_for_distributed(is_master):\n    \"\"\"\n    This function disables printing when not in master process\n    \"\"\"\n    import builtins as __builtin__\n    builtin_print = __builtin__.print\n\n    def print(*args, **kwargs):\n        force = kwargs.pop('force', False)\n        if is_master or force:\n            builtin_print(*args, **kwargs)\n\n    __builtin__.print = print\n\n\ndef is_dist_avail_and_initialized():\n    if not dist.is_available():\n        return False\n    if not dist.is_initialized():\n        return False\n    return True\n\n\ndef get_world_size():\n    if not is_dist_avail_and_initialized():\n        return 1\n    return dist.get_world_size()\n\n\ndef get_rank():\n    if not is_dist_avail_and_initialized():\n        return 0\n    return dist.get_rank()\n\n\ndef is_main_process():\n    return get_rank() == 0\n\n\ndef save_on_master(*args, **kwargs):\n    if is_main_process():\n        torch.save(*args, **kwargs)\n\n\ndef init_distributed_mode(args):\n    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:\n        args.rank = int(os.environ[\"RANK\"])\n        args.world_size = int(os.environ['WORLD_SIZE'])\n        args.gpu = int(os.environ['LOCAL_RANK'])\n    elif 'SLURM_PROCID' in os.environ:\n        args.rank = int(os.environ['SLURM_PROCID'])\n        args.gpu = args.rank % torch.cuda.device_count()\n    elif hasattr(args, \"rank\"):\n        pass\n    else:\n        print('Not using distributed mode')\n        args.distributed = False\n        return\n\n    args.distributed = True\n\n    torch.cuda.set_device(args.gpu)\n    args.dist_backend = 'nccl'\n    print('| distributed init (rank {}): {}'.format(\n        args.rank, args.dist_url), flush=True)\n    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,\n                                         world_size=args.world_size, rank=args.rank)\n    setup_for_distributed(args.rank == 0)\n"
  },
  {
    "path": "pytorch_segmentation/fcn/train_utils/train_and_eval.py",
    "content": "import torch\nfrom torch import nn\nimport train_utils.distributed_utils as utils\n\n\ndef criterion(inputs, target):\n    losses = {}\n    for name, x in inputs.items():\n        # 忽略target中值为255的像素，255的像素是目标边缘或者padding填充\n        losses[name] = nn.functional.cross_entropy(x, target, ignore_index=255)\n\n    if len(losses) == 1:\n        return losses['out']\n\n    return losses['out'] + 0.5 * losses['aux']\n\n\ndef evaluate(model, data_loader, device, num_classes):\n    model.eval()\n    confmat = utils.ConfusionMatrix(num_classes)\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    header = 'Test:'\n    with torch.no_grad():\n        for image, target in metric_logger.log_every(data_loader, 100, header):\n            image, target = image.to(device), target.to(device)\n            output = model(image)\n            output = output['out']\n\n            confmat.update(target.flatten(), output.argmax(1).flatten())\n\n        confmat.reduce_from_all_processes()\n\n    return confmat\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch, lr_scheduler, print_freq=10, scaler=None):\n    model.train()\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))\n    header = 'Epoch: [{}]'.format(epoch)\n\n    for image, target in metric_logger.log_every(data_loader, print_freq, header):\n        image, target = image.to(device), target.to(device)\n        with torch.cuda.amp.autocast(enabled=scaler is not None):\n            output = model(image)\n            loss = criterion(output, target)\n\n        optimizer.zero_grad()\n        if scaler is not None:\n            scaler.scale(loss).backward()\n            scaler.step(optimizer)\n            scaler.update()\n        else:\n            loss.backward()\n            optimizer.step()\n\n        lr_scheduler.step()\n\n        lr = optimizer.param_groups[0][\"lr\"]\n        metric_logger.update(loss=loss.item(), lr=lr)\n\n    return metric_logger.meters[\"loss\"].global_avg, lr\n\n\ndef create_lr_scheduler(optimizer,\n                        num_step: int,\n                        epochs: int,\n                        warmup=True,\n                        warmup_epochs=1,\n                        warmup_factor=1e-3):\n    assert num_step > 0 and epochs > 0\n    if warmup is False:\n        warmup_epochs = 0\n\n    def f(x):\n        \"\"\"\n        根据step数返回一个学习率倍率因子，\n        注意在训练开始之前，pytorch会提前调用一次lr_scheduler.step()方法\n        \"\"\"\n        if warmup is True and x <= (warmup_epochs * num_step):\n            alpha = float(x) / (warmup_epochs * num_step)\n            # warmup过程中lr倍率因子从warmup_factor -> 1\n            return warmup_factor * (1 - alpha) + alpha\n        else:\n            # warmup后lr倍率因子从1 -> 0\n            # 参考deeplab_v2: Learning rate policy\n            return (1 - (x - warmup_epochs * num_step) / ((epochs - warmup_epochs) * num_step)) ** 0.9\n\n    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)\n"
  },
  {
    "path": "pytorch_segmentation/fcn/transforms.py",
    "content": "import numpy as np\nimport random\n\nimport torch\nfrom torchvision import transforms as T\nfrom torchvision.transforms import functional as F\n\n\ndef pad_if_smaller(img, size, fill=0):\n    # 如果图像最小边长小于给定size，则用数值fill进行padding\n    min_size = min(img.size)\n    if min_size < size:\n        ow, oh = img.size\n        padh = size - oh if oh < size else 0\n        padw = size - ow if ow < size else 0\n        img = F.pad(img, (0, 0, padw, padh), fill=fill)\n    return img\n\n\nclass Compose(object):\n    def __init__(self, transforms):\n        self.transforms = transforms\n\n    def __call__(self, image, target):\n        for t in self.transforms:\n            image, target = t(image, target)\n        return image, target\n\n\nclass RandomResize(object):\n    def __init__(self, min_size, max_size=None):\n        self.min_size = min_size\n        if max_size is None:\n            max_size = min_size\n        self.max_size = max_size\n\n    def __call__(self, image, target):\n        size = random.randint(self.min_size, self.max_size)\n        # 这里size传入的是int类型，所以是将图像的最小边长缩放到size大小\n        image = F.resize(image, size)\n        # 这里的interpolation注意下，在torchvision(0.9.0)以后才有InterpolationMode.NEAREST\n        # 如果是之前的版本需要使用PIL.Image.NEAREST\n        target = F.resize(target, size, interpolation=T.InterpolationMode.NEAREST)\n        return image, target\n\n\nclass RandomHorizontalFlip(object):\n    def __init__(self, flip_prob):\n        self.flip_prob = flip_prob\n\n    def __call__(self, image, target):\n        if random.random() < self.flip_prob:\n            image = F.hflip(image)\n            target = F.hflip(target)\n        return image, target\n\n\nclass RandomCrop(object):\n    def __init__(self, size):\n        self.size = size\n\n    def __call__(self, image, target):\n        image = pad_if_smaller(image, self.size)\n        target = pad_if_smaller(target, self.size, fill=255)\n        crop_params = T.RandomCrop.get_params(image, (self.size, self.size))\n        image = F.crop(image, *crop_params)\n        target = F.crop(target, *crop_params)\n        return image, target\n\n\nclass CenterCrop(object):\n    def __init__(self, size):\n        self.size = size\n\n    def __call__(self, image, target):\n        image = F.center_crop(image, self.size)\n        target = F.center_crop(target, self.size)\n        return image, target\n\n\nclass ToTensor(object):\n    def __call__(self, image, target):\n        image = F.to_tensor(image)\n        target = torch.as_tensor(np.array(target), dtype=torch.int64)\n        return image, target\n\n\nclass Normalize(object):\n    def __init__(self, mean, std):\n        self.mean = mean\n        self.std = std\n\n    def __call__(self, image, target):\n        image = F.normalize(image, mean=self.mean, std=self.std)\n        return image, target\n"
  },
  {
    "path": "pytorch_segmentation/fcn/validation.py",
    "content": "import os\nimport torch\n\nfrom src import fcn_resnet50\nfrom train_utils import evaluate\nfrom my_dataset import VOCSegmentation\nimport transforms as T\n\n\nclass SegmentationPresetEval:\n    def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        self.transforms = T.Compose([\n            T.RandomResize(base_size, base_size),\n            T.ToTensor(),\n            T.Normalize(mean=mean, std=std),\n        ])\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n    assert os.path.exists(args.weights), f\"weights {args.weights} not found.\"\n\n    # segmentation nun_classes + background\n    num_classes = args.num_classes + 1\n\n    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt\n    val_dataset = VOCSegmentation(args.data_path,\n                                  year=\"2012\",\n                                  transforms=SegmentationPresetEval(520),\n                                  txt_name=\"val.txt\")\n\n    num_workers = 8\n    val_loader = torch.utils.data.DataLoader(val_dataset,\n                                             batch_size=1,\n                                             num_workers=num_workers,\n                                             pin_memory=True,\n                                             collate_fn=val_dataset.collate_fn)\n\n    model = fcn_resnet50(aux=args.aux, num_classes=num_classes)\n    model.load_state_dict(torch.load(args.weights, map_location=device)['model'])\n    model.to(device)\n\n    confmat = evaluate(model, val_loader, device=device, num_classes=num_classes)\n    print(confmat)\n\n\ndef parse_args():\n    import argparse\n    parser = argparse.ArgumentParser(description=\"pytorch fcn training\")\n\n    parser.add_argument(\"--data-path\", default=\"/data/\", help=\"VOCdevkit root\")\n    parser.add_argument(\"--weights\", default=\"./save_weights/model_29.pth\")\n    parser.add_argument(\"--num-classes\", default=20, type=int)\n    parser.add_argument(\"--aux\", default=True, type=bool, help=\"auxilier loss\")\n    parser.add_argument(\"--device\", default=\"cuda\", help=\"training device\")\n    parser.add_argument('--print-freq', default=10, type=int, help='print frequency')\n\n    args = parser.parse_args()\n\n    return args\n\n\nif __name__ == '__main__':\n    args = parse_args()\n\n    if not os.path.exists(\"./save_weights\"):\n        os.mkdir(\"./save_weights\")\n\n    main(args)\n"
  },
  {
    "path": "pytorch_segmentation/lraspp/README.md",
    "content": "# LRASPP(Searching for MobileNetV3)\n\n## 该项目主要是来自pytorch官方torchvision模块中的源码\n* https://github.com/pytorch/vision/tree/main/torchvision/models/segmentation\n\n## 环境配置：\n* Python3.6/3.7/3.8\n* Pytorch1.10\n* Ubuntu或Centos(Windows暂不支持多GPU训练)\n* 最好使用GPU训练\n* 详细环境配置见```requirements.txt```\n\n## 文件结构：\n```\n  ├── src: 模型的backbone以及LRASPP的搭建\n  ├── train_utils: 训练、验证以及多GPU训练相关模块\n  ├── my_dataset.py: 自定义dataset用于读取VOC数据集\n  ├── train.py: 单GPU训练脚本\n  ├── train_multi_GPU.py: 针对使用多GPU的用户使用\n  ├── predict.py: 简易的预测脚本，使用训练好的权重进行预测测试\n  ├── validation.py: 利用训练好的权重验证/测试数据的mIoU等指标，并生成record_mAP.txt文件\n  └── pascal_voc_classes.json: pascal_voc标签文件\n```\n\n## 预训练权重下载地址：\n* 注意：官方提供的预训练权重是在COCO上预训练得到的，训练时只针对和PASCAL VOC相同的类别进行了训练，所以类别数是21(包括背景)\n* lraspp_mobilenet_v3_large: https://download.pytorch.org/models/lraspp_mobilenet_v3_large-d234d4ea.pth\n* 注意，下载的预训练权重记得要重命名，比如在train.py中读取的是```lraspp_mobilenet_v3_large.pth```文件，\n  不是```lraspp_mobilenet_v3_large-d234d4ea.pth```\n \n \n## 数据集，本例程使用的是PASCAL VOC2012数据集\n* Pascal VOC2012 train/val数据集下载地址：http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar\n* 如果不了解数据集或者想使用自己的数据集进行训练，请参考我的博文: https://blog.csdn.net/qq_37541097/article/details/115787033\n\n## 训练方法\n* 确保提前准备好数据集\n* 确保提前下载好对应预训练模型权重\n* 若要使用单GPU或者CPU训练，直接使用train.py训练脚本\n* 若要使用多GPU训练，使用```torchrun --nproc_per_node=8 train_multi_GPU.py```指令,```nproc_per_node```参数为使用GPU数量\n* 如果想指定使用哪些GPU设备可在指令前加上```CUDA_VISIBLE_DEVICES=0,3```(例如我只要使用设备中的第1块和第4块GPU设备)\n* ```CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py```\n\n## 注意事项\n* 在使用训练脚本时，注意要将'--data-path'(VOC_root)设置为自己存放'VOCdevkit'文件夹所在的**根目录**\n* 在使用预测脚本时，要将'weights_path'设置为你自己生成的权重路径。\n* 使用validation文件时，注意确保你的验证集或者测试集中必须包含每个类别的目标，并且使用时只需要修改'--num-classes'、'--data-path'和'--weights'即可，其他代码尽量不要改动\n\n## 如果对LRASPP原理不是很理解可参考我的bilibili\nLR-ASPP网络讲解: [https://www.bilibili.com/video/BV1LS4y1M76E](https://www.bilibili.com/video/BV1LS4y1M76E)\n\n## 进一步了解该项目，以及对LRASPP代码的分析可参考我的bilibili\nLR-ASPP源码解析(Pytorch版): [https://www.bilibili.com/video/bv13D4y1F7ML](https://www.bilibili.com/video/bv13D4y1F7ML)\n\n## Pytorch官方实现的LRASPP网络框架图\n![lraspp](lraspp.png)\n"
  },
  {
    "path": "pytorch_segmentation/lraspp/get_palette.py",
    "content": "import json\nimport numpy as np\nfrom PIL import Image\n\n# 读取mask标签\ntarget = Image.open(\"./2007_001288.png\")\n# 获取调色板\npalette = target.getpalette()\npalette = np.reshape(palette, (-1, 3)).tolist()\n# 转换成字典子形式\npd = dict((i, color) for i, color in enumerate(palette))\n\njson_str = json.dumps(pd)\nwith open(\"palette.json\", \"w\") as f:\n    f.write(json_str)\n\n# target = np.array(target)\n# print(target)\n"
  },
  {
    "path": "pytorch_segmentation/lraspp/my_dataset.py",
    "content": "import os\n\nimport torch.utils.data as data\nfrom PIL import Image\n\n\nclass VOCSegmentation(data.Dataset):\n    def __init__(self, voc_root, year=\"2012\", transforms=None, txt_name: str = \"train.txt\"):\n        super(VOCSegmentation, self).__init__()\n        assert year in [\"2007\", \"2012\"], \"year must be in ['2007', '2012']\"\n        root = os.path.join(voc_root, \"VOCdevkit\", f\"VOC{year}\")\n        assert os.path.exists(root), \"path '{}' does not exist.\".format(root)\n        image_dir = os.path.join(root, 'JPEGImages')\n        mask_dir = os.path.join(root, 'SegmentationClass')\n\n        txt_path = os.path.join(root, \"ImageSets\", \"Segmentation\", txt_name)\n        assert os.path.exists(txt_path), \"file '{}' does not exist.\".format(txt_path)\n        with open(os.path.join(txt_path), \"r\") as f:\n            file_names = [x.strip() for x in f.readlines() if len(x.strip()) > 0]\n\n        self.images = [os.path.join(image_dir, x + \".jpg\") for x in file_names]\n        self.masks = [os.path.join(mask_dir, x + \".png\") for x in file_names]\n        assert (len(self.images) == len(self.masks))\n        self.transforms = transforms\n\n    def __getitem__(self, index):\n        \"\"\"\n        Args:\n            index (int): Index\n\n        Returns:\n            tuple: (image, target) where target is the image segmentation.\n        \"\"\"\n        img = Image.open(self.images[index]).convert('RGB')\n        target = Image.open(self.masks[index])\n\n        if self.transforms is not None:\n            img, target = self.transforms(img, target)\n\n        return img, target\n\n    def __len__(self):\n        return len(self.images)\n\n    @staticmethod\n    def collate_fn(batch):\n        images, targets = list(zip(*batch))\n        batched_imgs = cat_list(images, fill_value=0)\n        batched_targets = cat_list(targets, fill_value=255)\n        return batched_imgs, batched_targets\n\n\ndef cat_list(images, fill_value=0):\n    max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))\n    batch_shape = (len(images),) + max_size\n    batched_imgs = images[0].new(*batch_shape).fill_(fill_value)\n    for img, pad_img in zip(images, batched_imgs):\n        pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img)\n    return batched_imgs\n\n\n# dataset = VOCSegmentation(voc_root=\"/data/\", transforms=get_transform(train=True))\n# d1 = dataset[0]\n# print(d1)\n"
  },
  {
    "path": "pytorch_segmentation/lraspp/palette.json",
    "content": "{\"0\": [0, 0, 0], \"1\": [128, 0, 0], \"2\": [0, 128, 0], \"3\": [128, 128, 0], \"4\": [0, 0, 128], \"5\": [128, 0, 128], \"6\": [0, 128, 128], \"7\": [128, 128, 128], \"8\": [64, 0, 0], \"9\": [192, 0, 0], \"10\": [64, 128, 0], \"11\": [192, 128, 0], \"12\": [64, 0, 128], \"13\": [192, 0, 128], \"14\": [64, 128, 128], \"15\": [192, 128, 128], \"16\": [0, 64, 0], \"17\": [128, 64, 0], \"18\": [0, 192, 0], \"19\": [128, 192, 0], \"20\": [0, 64, 128], \"21\": [128, 64, 128], \"22\": [0, 192, 128], \"23\": [128, 192, 128], \"24\": [64, 64, 0], \"25\": [192, 64, 0], \"26\": [64, 192, 0], \"27\": [192, 192, 0], \"28\": [64, 64, 128], \"29\": [192, 64, 128], \"30\": [64, 192, 128], \"31\": [192, 192, 128], \"32\": [0, 0, 64], \"33\": [128, 0, 64], \"34\": [0, 128, 64], \"35\": [128, 128, 64], \"36\": [0, 0, 192], \"37\": [128, 0, 192], \"38\": [0, 128, 192], \"39\": [128, 128, 192], \"40\": [64, 0, 64], \"41\": [192, 0, 64], \"42\": [64, 128, 64], \"43\": [192, 128, 64], \"44\": [64, 0, 192], \"45\": [192, 0, 192], \"46\": [64, 128, 192], \"47\": [192, 128, 192], \"48\": [0, 64, 64], \"49\": [128, 64, 64], \"50\": [0, 192, 64], \"51\": [128, 192, 64], \"52\": [0, 64, 192], \"53\": [128, 64, 192], \"54\": [0, 192, 192], \"55\": [128, 192, 192], \"56\": [64, 64, 64], \"57\": [192, 64, 64], \"58\": [64, 192, 64], \"59\": [192, 192, 64], \"60\": [64, 64, 192], \"61\": [192, 64, 192], \"62\": [64, 192, 192], \"63\": [192, 192, 192], \"64\": [32, 0, 0], \"65\": [160, 0, 0], \"66\": [32, 128, 0], \"67\": [160, 128, 0], \"68\": [32, 0, 128], \"69\": [160, 0, 128], \"70\": [32, 128, 128], \"71\": [160, 128, 128], \"72\": [96, 0, 0], \"73\": [224, 0, 0], \"74\": [96, 128, 0], \"75\": [224, 128, 0], \"76\": [96, 0, 128], \"77\": [224, 0, 128], \"78\": [96, 128, 128], \"79\": [224, 128, 128], \"80\": [32, 64, 0], \"81\": [160, 64, 0], \"82\": [32, 192, 0], \"83\": [160, 192, 0], \"84\": [32, 64, 128], \"85\": [160, 64, 128], \"86\": [32, 192, 128], \"87\": [160, 192, 128], \"88\": [96, 64, 0], \"89\": [224, 64, 0], \"90\": [96, 192, 0], \"91\": [224, 192, 0], \"92\": [96, 64, 128], \"93\": [224, 64, 128], \"94\": [96, 192, 128], \"95\": [224, 192, 128], \"96\": [32, 0, 64], \"97\": [160, 0, 64], \"98\": [32, 128, 64], \"99\": [160, 128, 64], \"100\": [32, 0, 192], \"101\": [160, 0, 192], \"102\": [32, 128, 192], \"103\": [160, 128, 192], \"104\": [96, 0, 64], \"105\": [224, 0, 64], \"106\": [96, 128, 64], \"107\": [224, 128, 64], \"108\": [96, 0, 192], \"109\": [224, 0, 192], \"110\": [96, 128, 192], \"111\": [224, 128, 192], \"112\": [32, 64, 64], \"113\": [160, 64, 64], \"114\": [32, 192, 64], \"115\": [160, 192, 64], \"116\": [32, 64, 192], \"117\": [160, 64, 192], \"118\": [32, 192, 192], \"119\": [160, 192, 192], \"120\": [96, 64, 64], \"121\": [224, 64, 64], \"122\": [96, 192, 64], \"123\": [224, 192, 64], \"124\": [96, 64, 192], \"125\": [224, 64, 192], \"126\": [96, 192, 192], \"127\": [224, 192, 192], \"128\": [0, 32, 0], \"129\": [128, 32, 0], \"130\": [0, 160, 0], \"131\": [128, 160, 0], \"132\": [0, 32, 128], \"133\": [128, 32, 128], \"134\": [0, 160, 128], \"135\": [128, 160, 128], \"136\": [64, 32, 0], \"137\": [192, 32, 0], \"138\": [64, 160, 0], \"139\": [192, 160, 0], \"140\": [64, 32, 128], \"141\": [192, 32, 128], \"142\": [64, 160, 128], \"143\": [192, 160, 128], \"144\": [0, 96, 0], \"145\": [128, 96, 0], \"146\": [0, 224, 0], \"147\": [128, 224, 0], \"148\": [0, 96, 128], \"149\": [128, 96, 128], \"150\": [0, 224, 128], \"151\": [128, 224, 128], \"152\": [64, 96, 0], \"153\": [192, 96, 0], \"154\": [64, 224, 0], \"155\": [192, 224, 0], \"156\": [64, 96, 128], \"157\": [192, 96, 128], \"158\": [64, 224, 128], \"159\": [192, 224, 128], \"160\": [0, 32, 64], \"161\": [128, 32, 64], \"162\": [0, 160, 64], \"163\": [128, 160, 64], \"164\": [0, 32, 192], \"165\": [128, 32, 192], \"166\": [0, 160, 192], \"167\": [128, 160, 192], \"168\": [64, 32, 64], \"169\": [192, 32, 64], \"170\": [64, 160, 64], \"171\": [192, 160, 64], \"172\": [64, 32, 192], \"173\": [192, 32, 192], \"174\": [64, 160, 192], \"175\": [192, 160, 192], \"176\": [0, 96, 64], \"177\": [128, 96, 64], \"178\": [0, 224, 64], \"179\": [128, 224, 64], \"180\": [0, 96, 192], \"181\": [128, 96, 192], \"182\": [0, 224, 192], \"183\": [128, 224, 192], \"184\": [64, 96, 64], \"185\": [192, 96, 64], \"186\": [64, 224, 64], \"187\": [192, 224, 64], \"188\": [64, 96, 192], \"189\": [192, 96, 192], \"190\": [64, 224, 192], \"191\": [192, 224, 192], \"192\": [32, 32, 0], \"193\": [160, 32, 0], \"194\": [32, 160, 0], \"195\": [160, 160, 0], \"196\": [32, 32, 128], \"197\": [160, 32, 128], \"198\": [32, 160, 128], \"199\": [160, 160, 128], \"200\": [96, 32, 0], \"201\": [224, 32, 0], \"202\": [96, 160, 0], \"203\": [224, 160, 0], \"204\": [96, 32, 128], \"205\": [224, 32, 128], \"206\": [96, 160, 128], \"207\": [224, 160, 128], \"208\": [32, 96, 0], \"209\": [160, 96, 0], \"210\": [32, 224, 0], \"211\": [160, 224, 0], \"212\": [32, 96, 128], \"213\": [160, 96, 128], \"214\": [32, 224, 128], \"215\": [160, 224, 128], \"216\": [96, 96, 0], \"217\": [224, 96, 0], \"218\": [96, 224, 0], \"219\": [224, 224, 0], \"220\": [96, 96, 128], \"221\": [224, 96, 128], \"222\": [96, 224, 128], \"223\": [224, 224, 128], \"224\": [32, 32, 64], \"225\": [160, 32, 64], \"226\": [32, 160, 64], \"227\": [160, 160, 64], \"228\": [32, 32, 192], \"229\": [160, 32, 192], \"230\": [32, 160, 192], \"231\": [160, 160, 192], \"232\": [96, 32, 64], \"233\": [224, 32, 64], \"234\": [96, 160, 64], \"235\": [224, 160, 64], \"236\": [96, 32, 192], \"237\": [224, 32, 192], \"238\": [96, 160, 192], \"239\": [224, 160, 192], \"240\": [32, 96, 64], \"241\": [160, 96, 64], \"242\": [32, 224, 64], \"243\": [160, 224, 64], \"244\": [32, 96, 192], \"245\": [160, 96, 192], \"246\": [32, 224, 192], \"247\": [160, 224, 192], \"248\": [96, 96, 64], \"249\": [224, 96, 64], \"250\": [96, 224, 64], \"251\": [224, 224, 64], \"252\": [96, 96, 192], \"253\": [224, 96, 192], \"254\": [96, 224, 192], \"255\": [224, 224, 192]}"
  },
  {
    "path": "pytorch_segmentation/lraspp/pascal_voc_classes.json",
    "content": "{\n    \"aeroplane\": 1,\n    \"bicycle\": 2,\n    \"bird\": 3,\n    \"boat\": 4,\n    \"bottle\": 5,\n    \"bus\": 6,\n    \"car\": 7,\n    \"cat\": 8,\n    \"chair\": 9,\n    \"cow\": 10,\n    \"diningtable\": 11,\n    \"dog\": 12,\n    \"horse\": 13,\n    \"motorbike\": 14,\n    \"person\": 15,\n    \"pottedplant\": 16,\n    \"sheep\": 17,\n    \"sofa\": 18,\n    \"train\": 19,\n    \"tvmonitor\": 20\n}"
  },
  {
    "path": "pytorch_segmentation/lraspp/predict.py",
    "content": "import os\nimport time\nimport json\n\nimport torch\nfrom torchvision import transforms\nimport numpy as np\nfrom PIL import Image\n\nfrom src import lraspp_mobilenetv3_large\n\n\ndef time_synchronized():\n    torch.cuda.synchronize() if torch.cuda.is_available() else None\n    return time.time()\n\n\ndef main():\n    classes = 20\n    weights_path = \"./save_weights/model_29.pth\"\n    img_path = \"./test.jpg\"\n    palette_path = \"./palette.json\"\n    assert os.path.exists(weights_path), f\"weights {weights_path} not found.\"\n    assert os.path.exists(img_path), f\"image {img_path} not found.\"\n    assert os.path.exists(palette_path), f\"palette {palette_path} not found.\"\n    with open(palette_path, \"rb\") as f:\n        pallette_dict = json.load(f)\n        pallette = []\n        for v in pallette_dict.values():\n            pallette += v\n\n    # get devices\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n    print(\"using {} device.\".format(device))\n\n    # create model\n    model = lraspp_mobilenetv3_large(num_classes=classes+1)\n\n    # load weights\n    weights_dict = torch.load(weights_path, map_location='cpu')['model']\n    model.load_state_dict(weights_dict)\n    model.to(device)\n\n    # load image\n    original_img = Image.open(img_path)\n\n    # from pil image to tensor and normalize\n    data_transform = transforms.Compose([transforms.Resize(520),\n                                         transforms.ToTensor(),\n                                         transforms.Normalize(mean=(0.485, 0.456, 0.406),\n                                                              std=(0.229, 0.224, 0.225))])\n    img = data_transform(original_img)\n    # expand batch dimension\n    img = torch.unsqueeze(img, dim=0)\n\n    model.eval()  # 进入验证模式\n    with torch.no_grad():\n        # init model\n        img_height, img_width = img.shape[-2:]\n        init_img = torch.zeros((1, 3, img_height, img_width), device=device)\n        model(init_img)\n\n        t_start = time_synchronized()\n        output = model(img.to(device))\n        t_end = time_synchronized()\n        print(\"inference time: {}\".format(t_end - t_start))\n\n        prediction = output['out'].argmax(1).squeeze(0)\n        prediction = prediction.to(\"cpu\").numpy().astype(np.uint8)\n        mask = Image.fromarray(prediction)\n        mask.putpalette(pallette)\n        mask.save(\"test_result.png\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_segmentation/lraspp/requirements.txt",
    "content": "numpy==1.22.0\ntorch==1.10.0\ntorchvision==0.11.1\nPillow\n"
  },
  {
    "path": "pytorch_segmentation/lraspp/results20211028-105233.txt",
    "content": "[epoch: 0]\ntrain_loss: 0.5343\nlr: 0.000100\nglobal correct: 93.1\naverage row correct: ['96.8', '90.0', '73.9', '87.9', '79.4', '66.2', '92.1', '79.5', '90.9', '45.0', '88.9', '54.7', '85.8', '89.8', '87.5', '91.2', '66.8', '85.0', '68.4', '87.6', '71.6']\nIoU: ['92.4', '85.7', '34.8', '84.3', '66.4', '59.9', '89.2', '71.2', '86.0', '34.6', '82.3', '46.1', '78.6', '82.1', '79.8', '82.5', '54.8', '79.4', '50.2', '83.8', '65.5']\nmean IoU: 70.9\n\n[epoch: 1]\ntrain_loss: 0.4683\nlr: 0.000077\nglobal correct: 93.2\naverage row correct: ['96.2', '92.6', '75.2', '92.3', '82.6', '70.9', '93.5', '83.9', '93.5', '47.9', '91.0', '61.9', '87.0', '90.5', '89.8', '90.0', '68.1', '86.4', '70.4', '90.4', '75.5']\nIoU: ['92.5', '86.1', '34.9', '85.1', '65.3', '63.0', '90.0', '73.1', '86.0', '34.8', '83.2', '50.0', '77.6', '81.2', '79.8', '82.3', '54.3', '78.4', '49.8', '85.5', '67.3']\nmean IoU: 71.4\n\n[epoch: 2]\ntrain_loss: 0.4053\nlr: 0.000054\nglobal correct: 93.1\naverage row correct: ['95.9', '93.1', '75.9', '92.6', '83.8', '75.3', '94.4', '85.6', '93.7', '50.2', '91.2', '62.1', '87.1', '90.8', '90.3', '89.8', '71.2', '86.8', '71.8', '91.1', '77.5']\nIoU: ['92.5', '86.0', '35.1', '84.7', '65.2', '65.6', '90.4', '73.3', '85.9', '34.8', '83.0', '50.0', '77.7', '81.7', '79.2', '82.3', '53.9', '78.5', '49.9', '85.6', '67.2']\nmean IoU: 71.6\n\n[epoch: 3]\ntrain_loss: 0.4358\nlr: 0.000029\nglobal correct: 93.1\naverage row correct: ['95.8', '93.4', '76.0', '92.3', '83.2', '78.1', '94.0', '86.3', '93.0', '50.9', '91.1', '62.9', '88.0', '90.9', '90.4', '89.6', '71.6', '87.0', '72.4', '92.4', '78.5']\nIoU: ['92.5', '86.0', '35.3', '85.1', '66.1', '66.9', '89.8', '73.3', '85.9', '34.8', '83.0', '50.4', '78.0', '81.5', '79.0', '82.1', '54.1', '78.6', '50.0', '85.6', '67.1']\nmean IoU: 71.7\n\n[epoch: 4]\ntrain_loss: 0.3886\nlr: 0.000000\nglobal correct: 93.1\naverage row correct: ['95.6', '93.8', '76.0', '92.8', '83.6', '77.9', '94.2', '86.1', '93.5', '50.9', '92.0', '63.8', '88.8', '91.4', '90.6', '89.4', '73.2', '87.4', '73.0', '92.4', '78.9']\nIoU: ['92.5', '86.0', '35.3', '84.4', '66.2', '66.5', '89.9', '73.2', '85.9', '34.6', '83.2', '50.8', '78.0', '81.4', '78.6', '82.0', '53.6', '78.4', '50.1', '85.7', '66.6']\nmean IoU: 71.6\n\n"
  },
  {
    "path": "pytorch_segmentation/lraspp/src/__init__.py",
    "content": "from .lraspp_model import lraspp_mobilenetv3_large\n"
  },
  {
    "path": "pytorch_segmentation/lraspp/src/lraspp_model.py",
    "content": "from collections import OrderedDict\n\nfrom typing import Dict\n\nimport torch\nfrom torch import nn, Tensor\nfrom torch.nn import functional as F\nfrom .mobilenet_backbone import mobilenet_v3_large\n\n\nclass IntermediateLayerGetter(nn.ModuleDict):\n    \"\"\"\n    Module wrapper that returns intermediate layers from a model\n\n    It has a strong assumption that the modules have been registered\n    into the model in the same order as they are used.\n    This means that one should **not** reuse the same nn.Module\n    twice in the forward if you want this to work.\n\n    Additionally, it is only able to query submodules that are directly\n    assigned to the model. So if `model` is passed, `model.feature1` can\n    be returned, but not `model.feature1.layer2`.\n\n    Args:\n        model (nn.Module): model on which we will extract the features\n        return_layers (Dict[name, new_name]): a dict containing the names\n            of the modules for which the activations will be returned as\n            the key of the dict, and the value of the dict is the name\n            of the returned activation (which the user can specify).\n    \"\"\"\n    _version = 2\n    __annotations__ = {\n        \"return_layers\": Dict[str, str],\n    }\n\n    def __init__(self, model: nn.Module, return_layers: Dict[str, str]) -> None:\n        if not set(return_layers).issubset([name for name, _ in model.named_children()]):\n            raise ValueError(\"return_layers are not present in model\")\n        orig_return_layers = return_layers\n        return_layers = {str(k): str(v) for k, v in return_layers.items()}\n\n        # 重新构建backbone，将没有使用到的模块全部删掉\n        layers = OrderedDict()\n        for name, module in model.named_children():\n            layers[name] = module\n            if name in return_layers:\n                del return_layers[name]\n            if not return_layers:\n                break\n\n        super(IntermediateLayerGetter, self).__init__(layers)\n        self.return_layers = orig_return_layers\n\n    def forward(self, x: Tensor) -> Dict[str, Tensor]:\n        out = OrderedDict()\n        for name, module in self.items():\n            x = module(x)\n            if name in self.return_layers:\n                out_name = self.return_layers[name]\n                out[out_name] = x\n        return out\n\n\nclass LRASPP(nn.Module):\n    \"\"\"\n    Implements a Lite R-ASPP Network for semantic segmentation from\n    `\"Searching for MobileNetV3\"\n    <https://arxiv.org/abs/1905.02244>`_.\n\n    Args:\n        backbone (nn.Module): the network used to compute the features for the model.\n            The backbone should return an OrderedDict[Tensor], with the key being\n            \"high\" for the high level feature map and \"low\" for the low level feature map.\n        low_channels (int): the number of channels of the low level features.\n        high_channels (int): the number of channels of the high level features.\n        num_classes (int): number of output classes of the model (including the background).\n        inter_channels (int, optional): the number of channels for intermediate computations.\n    \"\"\"\n    __constants__ = ['aux_classifier']\n\n    def __init__(self,\n                 backbone: nn.Module,\n                 low_channels: int,\n                 high_channels: int,\n                 num_classes: int,\n                 inter_channels: int = 128) -> None:\n        super(LRASPP, self).__init__()\n        self.backbone = backbone\n        self.classifier = LRASPPHead(low_channels, high_channels, num_classes, inter_channels)\n\n    def forward(self, x: Tensor) -> Dict[str, Tensor]:\n        input_shape = x.shape[-2:]\n        features = self.backbone(x)\n        out = self.classifier(features)\n        out = F.interpolate(out, size=input_shape, mode=\"bilinear\", align_corners=False)\n\n        result = OrderedDict()\n        result[\"out\"] = out\n\n        return result\n\n\nclass LRASPPHead(nn.Module):\n    def __init__(self,\n                 low_channels: int,\n                 high_channels: int,\n                 num_classes: int,\n                 inter_channels: int) -> None:\n        super(LRASPPHead, self).__init__()\n        self.cbr = nn.Sequential(\n            nn.Conv2d(high_channels, inter_channels, 1, bias=False),\n            nn.BatchNorm2d(inter_channels),\n            nn.ReLU(inplace=True)\n        )\n        self.scale = nn.Sequential(\n            nn.AdaptiveAvgPool2d(1),\n            nn.Conv2d(high_channels, inter_channels, 1, bias=False),\n            nn.Sigmoid()\n        )\n        self.low_classifier = nn.Conv2d(low_channels, num_classes, 1)\n        self.high_classifier = nn.Conv2d(inter_channels, num_classes, 1)\n\n    def forward(self, inputs: Dict[str, Tensor]) -> Tensor:\n        low = inputs[\"low\"]\n        high = inputs[\"high\"]\n\n        x = self.cbr(high)\n        s = self.scale(high)\n        x = x * s\n        x = F.interpolate(x, size=low.shape[-2:], mode=\"bilinear\", align_corners=False)\n\n        return self.low_classifier(low) + self.high_classifier(x)\n\n\ndef lraspp_mobilenetv3_large(num_classes=21, pretrain_backbone=False):\n    # 'mobilenetv3_large_imagenet': 'https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth'\n    # 'lraspp_mobilenet_v3_large_coco': 'https://download.pytorch.org/models/lraspp_mobilenet_v3_large-d234d4ea.pth'\n    backbone = mobilenet_v3_large(dilated=True)\n\n    if pretrain_backbone:\n        # 载入mobilenetv3 large backbone预训练权重\n        backbone.load_state_dict(torch.load(\"mobilenet_v3_large.pth\", map_location='cpu'))\n\n    backbone = backbone.features\n\n    # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks.\n    # The first and last blocks are always included because they are the C0 (conv1) and Cn.\n    stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, \"is_strided\", False)] + [len(backbone) - 1]\n    low_pos = stage_indices[-4]  # use C2 here which has output_stride = 8\n    high_pos = stage_indices[-1]  # use C5 which has output_stride = 16\n    low_channels = backbone[low_pos].out_channels\n    high_channels = backbone[high_pos].out_channels\n\n    return_layers = {str(low_pos): \"low\", str(high_pos): \"high\"}\n    backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)\n\n    model = LRASPP(backbone, low_channels, high_channels, num_classes)\n    return model\n"
  },
  {
    "path": "pytorch_segmentation/lraspp/src/mobilenet_backbone.py",
    "content": "from typing import Callable, List, Optional\n\nimport torch\nfrom torch import nn, Tensor\nfrom torch.nn import functional as F\nfrom functools import partial\n\n\ndef _make_divisible(ch, divisor=8, min_ch=None):\n    \"\"\"\n    This function is taken from the original tf repo.\n    It ensures that all layers have a channel number that is divisible by 8\n    It can be seen here:\n    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py\n    \"\"\"\n    if min_ch is None:\n        min_ch = divisor\n    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)\n    # Make sure that round down does not go down by more than 10%.\n    if new_ch < 0.9 * ch:\n        new_ch += divisor\n    return new_ch\n\n\nclass ConvBNActivation(nn.Sequential):\n    def __init__(self,\n                 in_planes: int,\n                 out_planes: int,\n                 kernel_size: int = 3,\n                 stride: int = 1,\n                 groups: int = 1,\n                 norm_layer: Optional[Callable[..., nn.Module]] = None,\n                 activation_layer: Optional[Callable[..., nn.Module]] = None,\n                 dilation: int = 1):\n        padding = (kernel_size - 1) // 2 * dilation\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n        if activation_layer is None:\n            activation_layer = nn.ReLU6\n        super(ConvBNActivation, self).__init__(nn.Conv2d(in_channels=in_planes,\n                                                         out_channels=out_planes,\n                                                         kernel_size=kernel_size,\n                                                         stride=stride,\n                                                         dilation=dilation,\n                                                         padding=padding,\n                                                         groups=groups,\n                                                         bias=False),\n                                               norm_layer(out_planes),\n                                               activation_layer(inplace=True))\n        self.out_channels = out_planes\n\n\nclass SqueezeExcitation(nn.Module):\n    def __init__(self, input_c: int, squeeze_factor: int = 4):\n        super(SqueezeExcitation, self).__init__()\n        squeeze_c = _make_divisible(input_c // squeeze_factor, 8)\n        self.fc1 = nn.Conv2d(input_c, squeeze_c, 1)\n        self.fc2 = nn.Conv2d(squeeze_c, input_c, 1)\n\n    def forward(self, x: Tensor) -> Tensor:\n        scale = F.adaptive_avg_pool2d(x, output_size=(1, 1))\n        scale = self.fc1(scale)\n        scale = F.relu(scale, inplace=True)\n        scale = self.fc2(scale)\n        scale = F.hardsigmoid(scale, inplace=True)\n        return scale * x\n\n\nclass InvertedResidualConfig:\n    def __init__(self,\n                 input_c: int,\n                 kernel: int,\n                 expanded_c: int,\n                 out_c: int,\n                 use_se: bool,\n                 activation: str,\n                 stride: int,\n                 dilation: int,\n                 width_multi: float):\n        self.input_c = self.adjust_channels(input_c, width_multi)\n        self.kernel = kernel\n        self.expanded_c = self.adjust_channels(expanded_c, width_multi)\n        self.out_c = self.adjust_channels(out_c, width_multi)\n        self.use_se = use_se\n        self.use_hs = activation == \"HS\"  # whether using h-swish activation\n        self.stride = stride\n        self.dilation = dilation\n\n    @staticmethod\n    def adjust_channels(channels: int, width_multi: float):\n        return _make_divisible(channels * width_multi, 8)\n\n\nclass InvertedResidual(nn.Module):\n    def __init__(self,\n                 cnf: InvertedResidualConfig,\n                 norm_layer: Callable[..., nn.Module]):\n        super(InvertedResidual, self).__init__()\n\n        if cnf.stride not in [1, 2]:\n            raise ValueError(\"illegal stride value.\")\n\n        self.use_res_connect = (cnf.stride == 1 and cnf.input_c == cnf.out_c)\n\n        layers: List[nn.Module] = []\n        activation_layer = nn.Hardswish if cnf.use_hs else nn.ReLU\n\n        # expand\n        if cnf.expanded_c != cnf.input_c:\n            layers.append(ConvBNActivation(cnf.input_c,\n                                           cnf.expanded_c,\n                                           kernel_size=1,\n                                           norm_layer=norm_layer,\n                                           activation_layer=activation_layer))\n\n        # depthwise\n        stride = 1 if cnf.dilation > 1 else cnf.stride\n        layers.append(ConvBNActivation(cnf.expanded_c,\n                                       cnf.expanded_c,\n                                       kernel_size=cnf.kernel,\n                                       stride=stride,\n                                       dilation=cnf.dilation,\n                                       groups=cnf.expanded_c,\n                                       norm_layer=norm_layer,\n                                       activation_layer=activation_layer))\n\n        if cnf.use_se:\n            layers.append(SqueezeExcitation(cnf.expanded_c))\n\n        # project\n        layers.append(ConvBNActivation(cnf.expanded_c,\n                                       cnf.out_c,\n                                       kernel_size=1,\n                                       norm_layer=norm_layer,\n                                       activation_layer=nn.Identity))\n\n        self.block = nn.Sequential(*layers)\n        self.out_channels = cnf.out_c\n        self.is_strided = cnf.stride > 1\n\n    def forward(self, x: Tensor) -> Tensor:\n        result = self.block(x)\n        if self.use_res_connect:\n            result += x\n\n        return result\n\n\nclass MobileNetV3(nn.Module):\n    def __init__(self,\n                 inverted_residual_setting: List[InvertedResidualConfig],\n                 last_channel: int,\n                 num_classes: int = 1000,\n                 block: Optional[Callable[..., nn.Module]] = None,\n                 norm_layer: Optional[Callable[..., nn.Module]] = None):\n        super(MobileNetV3, self).__init__()\n\n        if not inverted_residual_setting:\n            raise ValueError(\"The inverted_residual_setting should not be empty.\")\n        elif not (isinstance(inverted_residual_setting, List) and\n                  all([isinstance(s, InvertedResidualConfig) for s in inverted_residual_setting])):\n            raise TypeError(\"The inverted_residual_setting should be List[InvertedResidualConfig]\")\n\n        if block is None:\n            block = InvertedResidual\n\n        if norm_layer is None:\n            norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.01)\n\n        layers: List[nn.Module] = []\n\n        # building first layer\n        firstconv_output_c = inverted_residual_setting[0].input_c\n        layers.append(ConvBNActivation(3,\n                                       firstconv_output_c,\n                                       kernel_size=3,\n                                       stride=2,\n                                       norm_layer=norm_layer,\n                                       activation_layer=nn.Hardswish))\n        # building inverted residual blocks\n        for cnf in inverted_residual_setting:\n            layers.append(block(cnf, norm_layer))\n\n        # building last several layers\n        lastconv_input_c = inverted_residual_setting[-1].out_c\n        lastconv_output_c = 6 * lastconv_input_c\n        layers.append(ConvBNActivation(lastconv_input_c,\n                                       lastconv_output_c,\n                                       kernel_size=1,\n                                       norm_layer=norm_layer,\n                                       activation_layer=nn.Hardswish))\n        self.features = nn.Sequential(*layers)\n        self.avgpool = nn.AdaptiveAvgPool2d(1)\n        self.classifier = nn.Sequential(nn.Linear(lastconv_output_c, last_channel),\n                                        nn.Hardswish(inplace=True),\n                                        nn.Dropout(p=0.2, inplace=True),\n                                        nn.Linear(last_channel, num_classes))\n\n        # initial weights\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(m.weight, mode=\"fan_out\")\n                if m.bias is not None:\n                    nn.init.zeros_(m.bias)\n            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):\n                nn.init.ones_(m.weight)\n                nn.init.zeros_(m.bias)\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                nn.init.zeros_(m.bias)\n\n    def _forward_impl(self, x: Tensor) -> Tensor:\n        x = self.features(x)\n        x = self.avgpool(x)\n        x = torch.flatten(x, 1)\n        x = self.classifier(x)\n\n        return x\n\n    def forward(self, x: Tensor) -> Tensor:\n        return self._forward_impl(x)\n\n\ndef mobilenet_v3_large(num_classes: int = 1000,\n                       reduced_tail: bool = False,\n                       dilated: bool = False) -> MobileNetV3:\n    \"\"\"\n    Constructs a large MobileNetV3 architecture from\n    \"Searching for MobileNetV3\" <https://arxiv.org/abs/1905.02244>.\n\n    weights_link:\n    https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth\n\n    Args:\n        num_classes (int): number of classes\n        reduced_tail (bool): If True, reduces the channel counts of all feature layers\n            between C4 and C5 by 2. It is used to reduce the channel redundancy in the\n            backbone for Detection and Segmentation.\n        dilated: whether using dilated conv\n    \"\"\"\n    width_multi = 1.0\n    bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi)\n    adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi)\n\n    reduce_divider = 2 if reduced_tail else 1\n    dilation = 2 if dilated else 1\n\n    inverted_residual_setting = [\n        # input_c, kernel, expanded_c, out_c, use_se, activation, stride, dilation\n        bneck_conf(16, 3, 16, 16, False, \"RE\", 1, 1),\n        bneck_conf(16, 3, 64, 24, False, \"RE\", 2, 1),  # C1\n        bneck_conf(24, 3, 72, 24, False, \"RE\", 1, 1),\n        bneck_conf(24, 5, 72, 40, True, \"RE\", 2, 1),  # C2\n        bneck_conf(40, 5, 120, 40, True, \"RE\", 1, 1),\n        bneck_conf(40, 5, 120, 40, True, \"RE\", 1, 1),\n        bneck_conf(40, 3, 240, 80, False, \"HS\", 2, 1),  # C3\n        bneck_conf(80, 3, 200, 80, False, \"HS\", 1, 1),\n        bneck_conf(80, 3, 184, 80, False, \"HS\", 1, 1),\n        bneck_conf(80, 3, 184, 80, False, \"HS\", 1, 1),\n        bneck_conf(80, 3, 480, 112, True, \"HS\", 1, 1),\n        bneck_conf(112, 3, 672, 112, True, \"HS\", 1, 1),\n        bneck_conf(112, 5, 672, 160 // reduce_divider, True, \"HS\", 2, dilation),  # C4\n        bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, \"HS\", 1, dilation),\n        bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, \"HS\", 1, dilation),\n    ]\n    last_channel = adjust_channels(1280 // reduce_divider)  # C5\n\n    return MobileNetV3(inverted_residual_setting=inverted_residual_setting,\n                       last_channel=last_channel,\n                       num_classes=num_classes)\n\n\ndef mobilenet_v3_small(num_classes: int = 1000,\n                       reduced_tail: bool = False,\n                       dilated: bool = False) -> MobileNetV3:\n    \"\"\"\n    Constructs a large MobileNetV3 architecture from\n    \"Searching for MobileNetV3\" <https://arxiv.org/abs/1905.02244>.\n\n    weights_link:\n    https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth\n\n    Args:\n        num_classes (int): number of classes\n        reduced_tail (bool): If True, reduces the channel counts of all feature layers\n            between C4 and C5 by 2. It is used to reduce the channel redundancy in the\n            backbone for Detection and Segmentation.\n        dilated: whether using dilated conv\n    \"\"\"\n    width_multi = 1.0\n    bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi)\n    adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi)\n\n    reduce_divider = 2 if reduced_tail else 1\n    dilation = 2 if dilated else 1\n\n    inverted_residual_setting = [\n        # input_c, kernel, expanded_c, out_c, use_se, activation, stride, dilation\n        bneck_conf(16, 3, 16, 16, True, \"RE\", 2, 1),  # C1\n        bneck_conf(16, 3, 72, 24, False, \"RE\", 2, 1),  # C2\n        bneck_conf(24, 3, 88, 24, False, \"RE\", 1, 1),\n        bneck_conf(24, 5, 96, 40, True, \"HS\", 2, 1),  # C3\n        bneck_conf(40, 5, 240, 40, True, \"HS\", 1, 1),\n        bneck_conf(40, 5, 240, 40, True, \"HS\", 1, 1),\n        bneck_conf(40, 5, 120, 48, True, \"HS\", 1, 1),\n        bneck_conf(48, 5, 144, 48, True, \"HS\", 1, 1),\n        bneck_conf(48, 5, 288, 96 // reduce_divider, True, \"HS\", 2, dilation),  # C4\n        bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, \"HS\", 1, dilation),\n        bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, \"HS\", 1, dilation)\n    ]\n    last_channel = adjust_channels(1024 // reduce_divider)  # C5\n\n    return MobileNetV3(inverted_residual_setting=inverted_residual_setting,\n                       last_channel=last_channel,\n                       num_classes=num_classes)\n"
  },
  {
    "path": "pytorch_segmentation/lraspp/train.py",
    "content": "import os\nimport time\nimport datetime\n\nimport torch\n\nfrom src import lraspp_mobilenetv3_large\nfrom train_utils import train_one_epoch, evaluate, create_lr_scheduler\nfrom my_dataset import VOCSegmentation\nimport transforms as T\n\n\nclass SegmentationPresetTrain:\n    def __init__(self, base_size, crop_size, hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        min_size = int(0.5 * base_size)\n        max_size = int(2.0 * base_size)\n\n        trans = [T.RandomResize(min_size, max_size)]\n        if hflip_prob > 0:\n            trans.append(T.RandomHorizontalFlip(hflip_prob))\n        trans.extend([\n            T.RandomCrop(crop_size),\n            T.ToTensor(),\n            T.Normalize(mean=mean, std=std),\n        ])\n        self.transforms = T.Compose(trans)\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\nclass SegmentationPresetEval:\n    def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        self.transforms = T.Compose([\n            T.RandomResize(base_size, base_size),\n            T.ToTensor(),\n            T.Normalize(mean=mean, std=std),\n        ])\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\ndef get_transform(train):\n    base_size = 520\n    crop_size = 480\n\n    return SegmentationPresetTrain(base_size, crop_size) if train else SegmentationPresetEval(base_size)\n\n\ndef create_model(num_classes, pretrain=True):\n    model = lraspp_mobilenetv3_large(num_classes=num_classes)\n\n    if pretrain:\n        weights_dict = torch.load(\"./lraspp_mobilenet_v3_large.pth\", map_location='cpu')\n\n        if num_classes != 21:\n            # 官方提供的预训练权重是21类(包括背景)\n            # 如果训练自己的数据集，将和类别相关的权重删除，防止权重shape不一致报错\n            for k in list(weights_dict.keys()):\n                if \"low_classifier\" in k or \"high_classifier\" in k:\n                    del weights_dict[k]\n\n        missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False)\n        if len(missing_keys) != 0 or len(unexpected_keys) != 0:\n            print(\"missing_keys: \", missing_keys)\n            print(\"unexpected_keys: \", unexpected_keys)\n\n    return model\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n    batch_size = args.batch_size\n    # segmentation nun_classes + background\n    num_classes = args.num_classes + 1\n\n    # 用来保存训练以及验证过程中信息\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> train.txt\n    train_dataset = VOCSegmentation(args.data_path,\n                                    year=\"2012\",\n                                    transforms=get_transform(train=True),\n                                    txt_name=\"train.txt\")\n\n    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt\n    val_dataset = VOCSegmentation(args.data_path,\n                                  year=\"2012\",\n                                  transforms=get_transform(train=False),\n                                  txt_name=\"val.txt\")\n\n    num_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])\n    train_loader = torch.utils.data.DataLoader(train_dataset,\n                                               batch_size=batch_size,\n                                               num_workers=num_workers,\n                                               shuffle=True,\n                                               pin_memory=True,\n                                               collate_fn=train_dataset.collate_fn)\n\n    val_loader = torch.utils.data.DataLoader(val_dataset,\n                                             batch_size=1,\n                                             num_workers=num_workers,\n                                             pin_memory=True,\n                                             collate_fn=val_dataset.collate_fn)\n\n    model = create_model(num_classes=num_classes)\n    model.to(device)\n\n    params_to_optimize = [\n        {\"params\": [p for p in model.backbone.parameters() if p.requires_grad]},\n        {\"params\": [p for p in model.classifier.parameters() if p.requires_grad]}\n    ]\n\n    optimizer = torch.optim.SGD(\n        params_to_optimize,\n        lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay\n    )\n\n    scaler = torch.cuda.amp.GradScaler() if args.amp else None\n\n    # 创建学习率更新策略，这里是每个step更新一次(不是每个epoch)\n    lr_scheduler = create_lr_scheduler(optimizer, len(train_loader), args.epochs, warmup=True)\n\n    if args.resume:\n        checkpoint = torch.load(args.resume, map_location='cpu')\n        model.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n        if args.amp:\n            scaler.load_state_dict(checkpoint[\"scaler\"])\n\n    start_time = time.time()\n    for epoch in range(args.start_epoch, args.epochs):\n        mean_loss, lr = train_one_epoch(model, optimizer, train_loader, device, epoch,\n                                        lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler)\n\n        confmat = evaluate(model, val_loader, device=device, num_classes=num_classes)\n        val_info = str(confmat)\n        print(val_info)\n        # write into txt\n        with open(results_file, \"a\") as f:\n            # 记录每个epoch对应的train_loss、lr以及验证集各指标\n            train_info = f\"[epoch: {epoch}]\\n\" \\\n                         f\"train_loss: {mean_loss:.4f}\\n\" \\\n                         f\"lr: {lr:.6f}\\n\"\n            f.write(train_info + val_info + \"\\n\\n\")\n\n        save_file = {\"model\": model.state_dict(),\n                     \"optimizer\": optimizer.state_dict(),\n                     \"lr_scheduler\": lr_scheduler.state_dict(),\n                     \"epoch\": epoch,\n                     \"args\": args}\n        if args.amp:\n            save_file[\"scaler\"] = scaler.state_dict()\n        torch.save(save_file, \"save_weights/model_{}.pth\".format(epoch))\n\n    total_time = time.time() - start_time\n    total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n    print(\"training time {}\".format(total_time_str))\n\n\ndef parse_args():\n    import argparse\n    parser = argparse.ArgumentParser(description=\"pytorch lraspp training\")\n\n    parser.add_argument(\"--data-path\", default=\"/data/\", help=\"VOCdevkit root\")\n    parser.add_argument(\"--num-classes\", default=20, type=int)\n    parser.add_argument(\"--device\", default=\"cuda\", help=\"training device\")\n    parser.add_argument(\"-b\", \"--batch-size\", default=4, type=int)\n    parser.add_argument(\"--epochs\", default=30, type=int, metavar=\"N\",\n                        help=\"number of total epochs to train\")\n\n    parser.add_argument('--lr', default=0.0001, type=float, help='initial learning rate')\n    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',\n                        help='momentum')\n    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,\n                        metavar='W', help='weight decay (default: 1e-4)',\n                        dest='weight_decay')\n    parser.add_argument('--print-freq', default=10, type=int, help='print frequency')\n    parser.add_argument('--resume', default='', help='resume from checkpoint')\n    parser.add_argument('--start-epoch', default=0, type=int, metavar='N',\n                        help='start epoch')\n    # Mixed precision training parameters\n    parser.add_argument(\"--amp\", default=False, type=bool,\n                        help=\"Use torch.cuda.amp for mixed precision training\")\n\n    args = parser.parse_args()\n\n    return args\n\n\nif __name__ == '__main__':\n    args = parse_args()\n\n    if not os.path.exists(\"./save_weights\"):\n        os.mkdir(\"./save_weights\")\n\n    main(args)\n"
  },
  {
    "path": "pytorch_segmentation/lraspp/train_multi_GPU.py",
    "content": "import time\nimport os\nimport datetime\n\nimport torch\n\nfrom src import lraspp_mobilenetv3_large\nfrom train_utils import train_one_epoch, evaluate, create_lr_scheduler, init_distributed_mode, save_on_master, mkdir\nfrom my_dataset import VOCSegmentation\nimport transforms as T\n\n\nclass SegmentationPresetTrain:\n    def __init__(self, base_size, crop_size, hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        min_size = int(0.5 * base_size)\n        max_size = int(2.0 * base_size)\n\n        trans = [T.RandomResize(min_size, max_size)]\n        if hflip_prob > 0:\n            trans.append(T.RandomHorizontalFlip(hflip_prob))\n        trans.extend([\n            T.RandomCrop(crop_size),\n            T.ToTensor(),\n            T.Normalize(mean=mean, std=std),\n        ])\n        self.transforms = T.Compose(trans)\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\nclass SegmentationPresetEval:\n    def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        self.transforms = T.Compose([\n            T.RandomResize(base_size, base_size),\n            T.ToTensor(),\n            T.Normalize(mean=mean, std=std),\n        ])\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\ndef get_transform(train):\n    base_size = 520\n    crop_size = 480\n\n    return SegmentationPresetTrain(base_size, crop_size) if train else SegmentationPresetEval(base_size)\n\n\ndef create_model(num_classes):\n    model = lraspp_mobilenetv3_large(num_classes=num_classes)\n    weights_dict = torch.load(\"./deeplabv3_resnet50_coco.pth\", map_location='cpu')\n\n    if num_classes != 21:\n        # 官方提供的预训练权重是21类(包括背景)\n        # 如果训练自己的数据集，将和类别相关的权重删除，防止权重shape不一致报错\n        for k in list(weights_dict.keys()):\n            if \"low_classifier\" in k or \"high_classifier\" in k:\n                del weights_dict[k]\n\n    missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False)\n    if len(missing_keys) != 0 or len(unexpected_keys) != 0:\n        print(\"missing_keys: \", missing_keys)\n        print(\"unexpected_keys: \", unexpected_keys)\n\n    return model\n\n\ndef main(args):\n    init_distributed_mode(args)\n    print(args)\n\n    device = torch.device(args.device)\n    # segmentation nun_classes + background\n    num_classes = args.num_classes + 1\n\n    # 用来保存coco_info的文件\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    VOC_root = args.data_path\n    # check voc root\n    if os.path.exists(os.path.join(VOC_root, \"VOCdevkit\")) is False:\n        raise FileNotFoundError(\"VOCdevkit dose not in path:'{}'.\".format(VOC_root))\n\n    # load train data set\n    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> train.txt\n    train_dataset = VOCSegmentation(args.data_path,\n                                    year=\"2012\",\n                                    transforms=get_transform(train=True),\n                                    txt_name=\"train.txt\")\n    # load validation data set\n    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt\n    val_dataset = VOCSegmentation(args.data_path,\n                                  year=\"2012\",\n                                  transforms=get_transform(train=False),\n                                  txt_name=\"val.txt\")\n\n    print(\"Creating data loaders\")\n    if args.distributed:\n        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)\n        test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)\n    else:\n        train_sampler = torch.utils.data.RandomSampler(train_dataset)\n        test_sampler = torch.utils.data.SequentialSampler(val_dataset)\n\n    train_data_loader = torch.utils.data.DataLoader(\n        train_dataset, batch_size=args.batch_size,\n        sampler=train_sampler, num_workers=args.workers,\n        collate_fn=train_dataset.collate_fn, drop_last=True)\n\n    val_data_loader = torch.utils.data.DataLoader(\n        val_dataset, batch_size=1,\n        sampler=test_sampler, num_workers=args.workers,\n        collate_fn=train_dataset.collate_fn)\n\n    print(\"Creating model\")\n    # create model num_classes equal background + 20 classes\n    model = create_model(num_classes=num_classes)\n    model.to(device)\n\n    if args.sync_bn:\n        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)\n\n    model_without_ddp = model\n    if args.distributed:\n        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])\n        model_without_ddp = model.module\n\n    params_to_optimize = [\n        {\"params\": [p for p in model_without_ddp.backbone.parameters() if p.requires_grad]},\n        {\"params\": [p for p in model_without_ddp.classifier.parameters() if p.requires_grad]},\n    ]\n\n    optimizer = torch.optim.SGD(\n        params_to_optimize,\n        lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)\n\n    scaler = torch.cuda.amp.GradScaler() if args.amp else None\n\n    # 创建学习率更新策略，这里是每个step更新一次(不是每个epoch)\n    lr_scheduler = create_lr_scheduler(optimizer, len(train_data_loader), args.epochs, warmup=True)\n\n    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练\n    if args.resume:\n        # If map_location is missing, torch.load will first load the module to CPU\n        # and then copy each parameter to where it was saved,\n        # which would result in all processes on the same machine using the same set of devices.\n        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)\n        model_without_ddp.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n        if args.amp:\n            scaler.load_state_dict(checkpoint[\"scaler\"])\n\n    if args.test_only:\n        confmat = evaluate(model, val_data_loader, device=device, num_classes=num_classes)\n        val_info = str(confmat)\n        print(val_info)\n        return\n\n    print(\"Start training\")\n    start_time = time.time()\n    for epoch in range(args.start_epoch, args.epochs):\n        if args.distributed:\n            train_sampler.set_epoch(epoch)\n        mean_loss, lr = train_one_epoch(model, optimizer, train_data_loader, device, epoch,\n                                        lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler)\n\n        confmat = evaluate(model, val_data_loader, device=device, num_classes=num_classes)\n        val_info = str(confmat)\n        print(val_info)\n\n        # 只在主进程上进行写操作\n        if args.rank in [-1, 0]:\n            # write into txt\n            with open(results_file, \"a\") as f:\n                # 记录每个epoch对应的train_loss、lr以及验证集各指标\n                train_info = f\"[epoch: {epoch}]\\n\" \\\n                             f\"train_loss: {mean_loss:.4f}\\n\" \\\n                             f\"lr: {lr:.6f}\\n\"\n                f.write(train_info + val_info + \"\\n\\n\")\n\n        if args.output_dir:\n            # 只在主节点上执行保存权重操作\n            save_file = {'model': model_without_ddp.state_dict(),\n                         'optimizer': optimizer.state_dict(),\n                         'lr_scheduler': lr_scheduler.state_dict(),\n                         'args': args,\n                         'epoch': epoch}\n            if args.amp:\n                save_file[\"scaler\"] = scaler.state_dict()\n            save_on_master(save_file,\n                           os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))\n\n    total_time = time.time() - start_time\n    total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n    print('Training time {}'.format(total_time_str))\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 训练文件的根目录(VOCdevkit)\n    parser.add_argument('--data-path', default='/data/', help='dataset')\n    # 训练设备类型\n    parser.add_argument('--device', default='cuda', help='device')\n    # 检测目标类别数(不包含背景)\n    parser.add_argument('--num-classes', default=20, type=int, help='num_classes')\n    # 每块GPU上的batch_size\n    parser.add_argument('-b', '--batch-size', default=4, type=int,\n                        help='images per gpu, the total batch size is $NGPU x batch_size')\n    # 指定接着从哪个epoch数开始训练\n    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')\n    # 训练的总epoch数\n    parser.add_argument('--epochs', default=20, type=int, metavar='N',\n                        help='number of total epochs to run')\n    # 是否使用同步BN(在多个GPU之间同步)，默认不开启，开启后训练速度会变慢\n    parser.add_argument('--sync_bn', type=bool, default=False, help='whether using SyncBatchNorm')\n    # 数据加载以及预处理的线程数\n    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',\n                        help='number of data loading workers (default: 4)')\n    # 训练学习率，这里默认设置成0.0001，如果效果不好可以尝试加大学习率\n    parser.add_argument('--lr', default=0.0001, type=float,\n                        help='initial learning rate')\n    # SGD的momentum参数\n    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',\n                        help='momentum')\n    # SGD的weight_decay参数\n    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,\n                        metavar='W', help='weight decay (default: 1e-4)',\n                        dest='weight_decay')\n    # 训练过程打印信息的频率\n    parser.add_argument('--print-freq', default=20, type=int, help='print frequency')\n    # 文件保存地址\n    parser.add_argument('--output-dir', default='./multi_train', help='path where to save')\n    # 基于上次的训练结果接着训练\n    parser.add_argument('--resume', default='', help='resume from checkpoint')\n    # 不训练，仅测试\n    parser.add_argument(\n        \"--test-only\",\n        dest=\"test_only\",\n        help=\"Only test the model\",\n        action=\"store_true\",\n    )\n\n    # 分布式进程数\n    parser.add_argument('--world-size', default=1, type=int,\n                        help='number of distributed processes')\n    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')\n    # Mixed precision training parameters\n    parser.add_argument(\"--amp\", default=False, type=bool,\n                        help=\"Use torch.cuda.amp for mixed precision training\")\n\n    args = parser.parse_args()\n\n    # 如果指定了保存文件地址，检查文件夹是否存在，若不存在，则创建\n    if args.output_dir:\n        mkdir(args.output_dir)\n\n    main(args)\n"
  },
  {
    "path": "pytorch_segmentation/lraspp/train_utils/__init__.py",
    "content": "from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler\nfrom .distributed_utils import init_distributed_mode, save_on_master, mkdir\n"
  },
  {
    "path": "pytorch_segmentation/lraspp/train_utils/distributed_utils.py",
    "content": "from collections import defaultdict, deque\nimport datetime\nimport time\nimport torch\nimport torch.distributed as dist\n\nimport errno\nimport os\n\n\nclass SmoothedValue(object):\n    \"\"\"Track a series of values and provide access to smoothed values over a\n    window or the global series average.\n    \"\"\"\n\n    def __init__(self, window_size=20, fmt=None):\n        if fmt is None:\n            fmt = \"{value:.4f} ({global_avg:.4f})\"\n        self.deque = deque(maxlen=window_size)\n        self.total = 0.0\n        self.count = 0\n        self.fmt = fmt\n\n    def update(self, value, n=1):\n        self.deque.append(value)\n        self.count += n\n        self.total += value * n\n\n    def synchronize_between_processes(self):\n        \"\"\"\n        Warning: does not synchronize the deque!\n        \"\"\"\n        if not is_dist_avail_and_initialized():\n            return\n        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')\n        dist.barrier()\n        dist.all_reduce(t)\n        t = t.tolist()\n        self.count = int(t[0])\n        self.total = t[1]\n\n    @property\n    def median(self):\n        d = torch.tensor(list(self.deque))\n        return d.median().item()\n\n    @property\n    def avg(self):\n        d = torch.tensor(list(self.deque), dtype=torch.float32)\n        return d.mean().item()\n\n    @property\n    def global_avg(self):\n        return self.total / self.count\n\n    @property\n    def max(self):\n        return max(self.deque)\n\n    @property\n    def value(self):\n        return self.deque[-1]\n\n    def __str__(self):\n        return self.fmt.format(\n            median=self.median,\n            avg=self.avg,\n            global_avg=self.global_avg,\n            max=self.max,\n            value=self.value)\n\n\nclass ConfusionMatrix(object):\n    def __init__(self, num_classes):\n        self.num_classes = num_classes\n        self.mat = None\n\n    def update(self, a, b):\n        n = self.num_classes\n        if self.mat is None:\n            # 创建混淆矩阵\n            self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device)\n        with torch.no_grad():\n            # 寻找GT中为目标的像素索引\n            k = (a >= 0) & (a < n)\n            # 统计像素真实类别a[k]被预测成类别b[k]的个数(这里的做法很巧妙)\n            inds = n * a[k].to(torch.int64) + b[k]\n            self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n)\n\n    def reset(self):\n        if self.mat is not None:\n            self.mat.zero_()\n\n    def compute(self):\n        h = self.mat.float()\n        # 计算全局预测准确率(混淆矩阵的对角线为预测正确的个数)\n        acc_global = torch.diag(h).sum() / h.sum()\n        # 计算每个类别的准确率\n        acc = torch.diag(h) / h.sum(1)\n        # 计算每个类别预测与真实目标的iou\n        iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h))\n        return acc_global, acc, iu\n\n    def reduce_from_all_processes(self):\n        if not torch.distributed.is_available():\n            return\n        if not torch.distributed.is_initialized():\n            return\n        torch.distributed.barrier()\n        torch.distributed.all_reduce(self.mat)\n\n    def __str__(self):\n        acc_global, acc, iu = self.compute()\n        return (\n            'global correct: {:.1f}\\n'\n            'average row correct: {}\\n'\n            'IoU: {}\\n'\n            'mean IoU: {:.1f}').format(\n                acc_global.item() * 100,\n                ['{:.1f}'.format(i) for i in (acc * 100).tolist()],\n                ['{:.1f}'.format(i) for i in (iu * 100).tolist()],\n                iu.mean().item() * 100)\n\n\nclass MetricLogger(object):\n    def __init__(self, delimiter=\"\\t\"):\n        self.meters = defaultdict(SmoothedValue)\n        self.delimiter = delimiter\n\n    def update(self, **kwargs):\n        for k, v in kwargs.items():\n            if isinstance(v, torch.Tensor):\n                v = v.item()\n            assert isinstance(v, (float, int))\n            self.meters[k].update(v)\n\n    def __getattr__(self, attr):\n        if attr in self.meters:\n            return self.meters[attr]\n        if attr in self.__dict__:\n            return self.__dict__[attr]\n        raise AttributeError(\"'{}' object has no attribute '{}'\".format(\n            type(self).__name__, attr))\n\n    def __str__(self):\n        loss_str = []\n        for name, meter in self.meters.items():\n            loss_str.append(\n                \"{}: {}\".format(name, str(meter))\n            )\n        return self.delimiter.join(loss_str)\n\n    def synchronize_between_processes(self):\n        for meter in self.meters.values():\n            meter.synchronize_between_processes()\n\n    def add_meter(self, name, meter):\n        self.meters[name] = meter\n\n    def log_every(self, iterable, print_freq, header=None):\n        i = 0\n        if not header:\n            header = ''\n        start_time = time.time()\n        end = time.time()\n        iter_time = SmoothedValue(fmt='{avg:.4f}')\n        data_time = SmoothedValue(fmt='{avg:.4f}')\n        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'\n        if torch.cuda.is_available():\n            log_msg = self.delimiter.join([\n                header,\n                '[{0' + space_fmt + '}/{1}]',\n                'eta: {eta}',\n                '{meters}',\n                'time: {time}',\n                'data: {data}',\n                'max mem: {memory:.0f}'\n            ])\n        else:\n            log_msg = self.delimiter.join([\n                header,\n                '[{0' + space_fmt + '}/{1}]',\n                'eta: {eta}',\n                '{meters}',\n                'time: {time}',\n                'data: {data}'\n            ])\n        MB = 1024.0 * 1024.0\n        for obj in iterable:\n            data_time.update(time.time() - end)\n            yield obj\n            iter_time.update(time.time() - end)\n            if i % print_freq == 0:\n                eta_seconds = iter_time.global_avg * (len(iterable) - i)\n                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))\n                if torch.cuda.is_available():\n                    print(log_msg.format(\n                        i, len(iterable), eta=eta_string,\n                        meters=str(self),\n                        time=str(iter_time), data=str(data_time),\n                        memory=torch.cuda.max_memory_allocated() / MB))\n                else:\n                    print(log_msg.format(\n                        i, len(iterable), eta=eta_string,\n                        meters=str(self),\n                        time=str(iter_time), data=str(data_time)))\n            i += 1\n            end = time.time()\n        total_time = time.time() - start_time\n        total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n        print('{} Total time: {}'.format(header, total_time_str))\n\n\ndef mkdir(path):\n    try:\n        os.makedirs(path)\n    except OSError as e:\n        if e.errno != errno.EEXIST:\n            raise\n\n\ndef setup_for_distributed(is_master):\n    \"\"\"\n    This function disables printing when not in master process\n    \"\"\"\n    import builtins as __builtin__\n    builtin_print = __builtin__.print\n\n    def print(*args, **kwargs):\n        force = kwargs.pop('force', False)\n        if is_master or force:\n            builtin_print(*args, **kwargs)\n\n    __builtin__.print = print\n\n\ndef is_dist_avail_and_initialized():\n    if not dist.is_available():\n        return False\n    if not dist.is_initialized():\n        return False\n    return True\n\n\ndef get_world_size():\n    if not is_dist_avail_and_initialized():\n        return 1\n    return dist.get_world_size()\n\n\ndef get_rank():\n    if not is_dist_avail_and_initialized():\n        return 0\n    return dist.get_rank()\n\n\ndef is_main_process():\n    return get_rank() == 0\n\n\ndef save_on_master(*args, **kwargs):\n    if is_main_process():\n        torch.save(*args, **kwargs)\n\n\ndef init_distributed_mode(args):\n    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:\n        args.rank = int(os.environ[\"RANK\"])\n        args.world_size = int(os.environ['WORLD_SIZE'])\n        args.gpu = int(os.environ['LOCAL_RANK'])\n    elif 'SLURM_PROCID' in os.environ:\n        args.rank = int(os.environ['SLURM_PROCID'])\n        args.gpu = args.rank % torch.cuda.device_count()\n    elif hasattr(args, \"rank\"):\n        pass\n    else:\n        print('Not using distributed mode')\n        args.distributed = False\n        return\n\n    args.distributed = True\n\n    torch.cuda.set_device(args.gpu)\n    args.dist_backend = 'nccl'\n    print('| distributed init (rank {}): {}'.format(\n        args.rank, args.dist_url), flush=True)\n    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,\n                                         world_size=args.world_size, rank=args.rank)\n    setup_for_distributed(args.rank == 0)\n"
  },
  {
    "path": "pytorch_segmentation/lraspp/train_utils/train_and_eval.py",
    "content": "import torch\nfrom torch import nn\nimport train_utils.distributed_utils as utils\n\n\ndef criterion(inputs, target):\n    losses = {}\n    for name, x in inputs.items():\n        # 忽略target中值为255的像素，255的像素是目标边缘或者padding填充\n        losses[name] = nn.functional.cross_entropy(x, target, ignore_index=255)\n\n    if len(losses) == 1:\n        return losses['out']\n\n    return losses['out'] + 0.5 * losses['aux']\n\n\ndef evaluate(model, data_loader, device, num_classes):\n    model.eval()\n    confmat = utils.ConfusionMatrix(num_classes)\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    header = 'Test:'\n    with torch.no_grad():\n        for image, target in metric_logger.log_every(data_loader, 100, header):\n            image, target = image.to(device), target.to(device)\n            output = model(image)\n            output = output['out']\n\n            confmat.update(target.flatten(), output.argmax(1).flatten())\n\n        confmat.reduce_from_all_processes()\n\n    return confmat\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch, lr_scheduler, print_freq=10, scaler=None):\n    model.train()\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))\n    header = 'Epoch: [{}]'.format(epoch)\n\n    for image, target in metric_logger.log_every(data_loader, print_freq, header):\n        image, target = image.to(device), target.to(device)\n        with torch.cuda.amp.autocast(enabled=scaler is not None):\n            output = model(image)\n            loss = criterion(output, target)\n\n        optimizer.zero_grad()\n        if scaler is not None:\n            scaler.scale(loss).backward()\n            scaler.step(optimizer)\n            scaler.update()\n        else:\n            loss.backward()\n            optimizer.step()\n\n        lr_scheduler.step()\n\n        lr = optimizer.param_groups[0][\"lr\"]\n        metric_logger.update(loss=loss.item(), lr=lr)\n\n    return metric_logger.meters[\"loss\"].global_avg, lr\n\n\ndef create_lr_scheduler(optimizer,\n                        num_step: int,\n                        epochs: int,\n                        warmup=True,\n                        warmup_epochs=1,\n                        warmup_factor=1e-3):\n    assert num_step > 0 and epochs > 0\n    if warmup is False:\n        warmup_epochs = 0\n\n    def f(x):\n        \"\"\"\n        根据step数返回一个学习率倍率因子，\n        注意在训练开始之前，pytorch会提前调用一次lr_scheduler.step()方法\n        \"\"\"\n        if warmup is True and x <= (warmup_epochs * num_step):\n            alpha = float(x) / (warmup_epochs * num_step)\n            # warmup过程中lr倍率因子从warmup_factor -> 1\n            return warmup_factor * (1 - alpha) + alpha\n        else:\n            # warmup后lr倍率因子从1 -> 0\n            # 参考deeplab_v2: Learning rate policy\n            return (1 - (x - warmup_epochs * num_step) / ((epochs - warmup_epochs) * num_step)) ** 0.9\n\n    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)\n"
  },
  {
    "path": "pytorch_segmentation/lraspp/transforms.py",
    "content": "import numpy as np\nimport random\n\nimport torch\nfrom torchvision import transforms as T\nfrom torchvision.transforms import functional as F\n\n\ndef pad_if_smaller(img, size, fill=0):\n    # 如果图像最小边长小于给定size，则用数值fill进行padding\n    min_size = min(img.size)\n    if min_size < size:\n        ow, oh = img.size\n        padh = size - oh if oh < size else 0\n        padw = size - ow if ow < size else 0\n        img = F.pad(img, (0, 0, padw, padh), fill=fill)\n    return img\n\n\nclass Compose(object):\n    def __init__(self, transforms):\n        self.transforms = transforms\n\n    def __call__(self, image, target):\n        for t in self.transforms:\n            image, target = t(image, target)\n        return image, target\n\n\nclass RandomResize(object):\n    def __init__(self, min_size, max_size=None):\n        self.min_size = min_size\n        if max_size is None:\n            max_size = min_size\n        self.max_size = max_size\n\n    def __call__(self, image, target):\n        size = random.randint(self.min_size, self.max_size)\n        # 这里size传入的是int类型，所以是将图像的最小边长缩放到size大小\n        image = F.resize(image, size)\n        # 这里的interpolation注意下，在torchvision(0.9.0)以后才有InterpolationMode.NEAREST\n        # 如果是之前的版本需要使用PIL.Image.NEAREST\n        target = F.resize(target, size, interpolation=T.InterpolationMode.NEAREST)\n        return image, target\n\n\nclass RandomHorizontalFlip(object):\n    def __init__(self, flip_prob):\n        self.flip_prob = flip_prob\n\n    def __call__(self, image, target):\n        if random.random() < self.flip_prob:\n            image = F.hflip(image)\n            target = F.hflip(target)\n        return image, target\n\n\nclass RandomCrop(object):\n    def __init__(self, size):\n        self.size = size\n\n    def __call__(self, image, target):\n        image = pad_if_smaller(image, self.size)\n        target = pad_if_smaller(target, self.size, fill=255)\n        crop_params = T.RandomCrop.get_params(image, (self.size, self.size))\n        image = F.crop(image, *crop_params)\n        target = F.crop(target, *crop_params)\n        return image, target\n\n\nclass CenterCrop(object):\n    def __init__(self, size):\n        self.size = size\n\n    def __call__(self, image, target):\n        image = F.center_crop(image, self.size)\n        target = F.center_crop(target, self.size)\n        return image, target\n\n\nclass ToTensor(object):\n    def __call__(self, image, target):\n        image = F.to_tensor(image)\n        target = torch.as_tensor(np.array(target), dtype=torch.int64)\n        return image, target\n\n\nclass Normalize(object):\n    def __init__(self, mean, std):\n        self.mean = mean\n        self.std = std\n\n    def __call__(self, image, target):\n        image = F.normalize(image, mean=self.mean, std=self.std)\n        return image, target\n"
  },
  {
    "path": "pytorch_segmentation/lraspp/validation.py",
    "content": "import os\nimport torch\n\nfrom src import lraspp_mobilenetv3_large\nfrom train_utils import evaluate\nfrom my_dataset import VOCSegmentation\nimport transforms as T\n\n\nclass SegmentationPresetEval:\n    def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        self.transforms = T.Compose([\n            T.RandomResize(base_size, base_size),\n            T.ToTensor(),\n            T.Normalize(mean=mean, std=std),\n        ])\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n    assert os.path.exists(args.weights), f\"weights {args.weights} not found.\"\n\n    # segmentation nun_classes + background\n    num_classes = args.num_classes + 1\n\n    # VOCdevkit -> VOC2012 -> ImageSets -> Segmentation -> val.txt\n    val_dataset = VOCSegmentation(args.data_path,\n                                  year=\"2012\",\n                                  transforms=SegmentationPresetEval(520),\n                                  txt_name=\"val.txt\")\n\n    num_workers = 8\n    val_loader = torch.utils.data.DataLoader(val_dataset,\n                                             batch_size=1,\n                                             num_workers=num_workers,\n                                             pin_memory=True,\n                                             collate_fn=val_dataset.collate_fn)\n\n    model = lraspp_mobilenetv3_large(num_classes=num_classes)\n    model.load_state_dict(torch.load(args.weights, map_location=device)['model'])\n    model.to(device)\n\n    confmat = evaluate(model, val_loader, device=device, num_classes=num_classes)\n    print(confmat)\n\n\ndef parse_args():\n    import argparse\n    parser = argparse.ArgumentParser(description=\"pytorch lraspp validation\")\n\n    parser.add_argument(\"--data-path\", default=\"/data/\", help=\"VOCdevkit root\")\n    parser.add_argument(\"--weights\", default=\"./save_weights/model_29.pth\")\n    parser.add_argument(\"--num-classes\", default=20, type=int)\n    parser.add_argument(\"--device\", default=\"cuda\", help=\"training device\")\n    parser.add_argument('--print-freq', default=10, type=int, help='print frequency')\n\n    args = parser.parse_args()\n\n    return args\n\n\nif __name__ == '__main__':\n    args = parse_args()\n    main(args)\n"
  },
  {
    "path": "pytorch_segmentation/u2net/README.md",
    "content": "# U2-Net(Going Deeper with Nested U-Structure for Salient Object Detection)\n\n## 该项目主要是来自官方的源码\n- https://github.com/xuebinqin/U-2-Net\n- 注意，该项目是针对显著性目标检测领域（Salient Object Detection / SOD）\n\n## 环境配置：\n- Python3.6/3.7/3.8\n- Pytorch1.10\n- Ubuntu或Centos(Windows暂不支持多GPU训练)\n- 建议使用GPU训练\n- 详细环境配置见`requirements.txt`\n\n\n## 文件结构\n```\n├── src: 搭建网络相关代码\n├── train_utils: 训练以及验证相关代码\n├── my_dataset.py: 自定义数据集读取相关代码\n├── predict.py: 简易的预测代码\n├── train.py: 单GPU或CPU训练代码\n├── train_multi_GPU.py: 多GPU并行训练代码\n├── validation.py: 单独验证模型相关代码\n├── transforms.py: 数据预处理相关代码\n└── requirements.txt: 项目依赖\n```\n\n## DUTS数据集准备\n- DUTS数据集官方下载地址：[http://saliencydetection.net/duts/](http://saliencydetection.net/duts/)\n- 如果下载不了，可以通过我提供的百度云下载，链接: https://pan.baidu.com/s/1nBI6GTN0ZilqH4Tvu18dow  密码: r7k6\n- 其中DUTS-TR为训练集，DUTS-TE是测试（验证）集，数据集解压后目录结构如下：\n```\n├── DUTS-TR\n│      ├── DUTS-TR-Image: 该文件夹存放所有训练集的图片\n│      └── DUTS-TR-Mask: 该文件夹存放对应训练图片的GT标签（Mask蒙板形式）\n│\n└── DUTS-TE\n       ├── DUTS-TE-Image: 该文件夹存放所有测试（验证）集的图片\n       └── DUTS-TE-Mask: 该文件夹存放对应测试（验证）图片的GT标签（Mask蒙板形式）\n```\n- 注意训练或者验证过程中，将`--data-path`指向`DUTS-TR`所在根目录\n\n## 官方权重\n从官方转换得到的权重：\n- `u2net_full.pth`下载链接: https://pan.baidu.com/s/1ojJZS8v3F_eFKkF3DEdEXA  密码: fh1v\n- `u2net_lite.pth`下载链接: https://pan.baidu.com/s/1TIWoiuEz9qRvTX9quDqQHg  密码: 5stj\n\n`u2net_full`在DUTS-TE上的验证结果(使用`validation.py`进行验证)：\n```\nMAE: 0.044\nmaxF1: 0.868\n```\n**注：**\n- 这里的maxF1和原论文中的结果有些差异，经过对比发现差异主要来自post_norm，原仓库中会对预测结果进行post_norm，但在本仓库中将post_norm给移除了。\n如果加上post_norm这里的maxF1为`0.872`，如果需要做该后处理可自行添加，post_norm流程如下，其中output为验证时网络预测的输出：\n```python\nma = torch.max(output)\nmi = torch.min(output)\noutput = (output - mi) / (ma - mi)\n```\n- 如果要载入官方提供的权重，需要将`src/model.py`中`ConvBNReLU`类里卷积的bias设置成True，因为官方代码里没有进行设置（Conv2d的bias默认为True）。\n因为卷积后跟了BN，所以bias是起不到作用的，所以在本仓库中默认将bias设置为False。\n\n## 训练记录(`u2net_full`)\n训练指令：\n```\ntorchrun --nproc_per_node=4 train_multi_GPU.py --lr 0.004 --amp\n```\n训练最终在DUTS-TE上的验证结果：\n```\nMAE: 0.047\nmaxF1: 0.859\n```\n训练过程详情可见results.txt文件，训练权重下载链接: https://pan.baidu.com/s/1df2jMkrjbgEv-r1NMaZCZg  密码: n4l6\n\n## 训练方法\n* 确保提前准备好数据集\n* 若要使用单GPU或者CPU训练，直接使用train.py训练脚本\n* 若要使用多GPU训练，使用`torchrun --nproc_per_node=8 train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量\n* 如果想指定使用哪些GPU设备可在指令前加上`CUDA_VISIBLE_DEVICES=0,3`(例如我只要使用设备中的第1块和第4块GPU设备)\n* `CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py`\n\n## 如果对U2Net网络不了解的可参考我的bilibili\n- [https://www.bilibili.com/video/BV1yB4y1z7m](https://www.bilibili.com/video/BV1yB4y1z7m)\n\n## 进一步了解该项目，以及对U2Net代码的分析可参考我的bilibili\n- [https://www.bilibili.com/video/BV1Kt4y137iS](https://www.bilibili.com/video/BV1Kt4y137iS)\n\n## U2NET网络结构\n![u2net](./u2net.png)"
  },
  {
    "path": "pytorch_segmentation/u2net/convert_weight.py",
    "content": "import re\nimport torch\nfrom src import u2net_full, u2net_lite\n\nlayers = {\"encode\": [7, 6, 5, 4, 4, 4],\n          \"decode\": [4, 4, 5, 6, 7]}\n\n\ndef convert_conv_bn(new_weight, prefix, ks, v):\n    if \"conv\" in ks[0]:\n        if \"weight\" == ks[1]:\n            new_weight[prefix + \".conv.weight\"] = v\n        elif \"bias\" == ks[1]:\n            new_weight[prefix + \".conv.bias\"] = v\n        else:\n            print(f\"unrecognized weight {prefix + ks[1]}\")\n        return\n\n    if \"bn\" in ks[0]:\n        if \"running_mean\" == ks[1]:\n            new_weight[prefix + \".bn.running_mean\"] = v\n        elif \"running_var\" == ks[1]:\n            new_weight[prefix + \".bn.running_var\"] = v\n        elif \"weight\" == ks[1]:\n            new_weight[prefix + \".bn.weight\"] = v\n        elif \"bias\" == ks[1]:\n            new_weight[prefix + \".bn.bias\"] = v\n        elif \"num_batches_tracked\" == ks[1]:\n            return\n        else:\n            print(f\"unrecognized weight {prefix + ks[1]}\")\n        return\n\n\ndef convert(old_weight: dict):\n    new_weight = {}\n    for k, v in old_weight.items():\n        ks = k.split(\".\")\n        if (\"stage\" in ks[0]) and (\"d\" not in ks[0]):\n            # encode stage\n            num = int(re.findall(r'\\d', ks[0])[0]) - 1\n            prefix = f\"encode_modules.{num}\"\n            if \"rebnconvin\" == ks[1]:\n                # ConvBNReLU module\n                prefix += \".conv_in\"\n                convert_conv_bn(new_weight, prefix, ks[2:], v)\n            elif (\"rebnconv\" in ks[1]) and (\"d\" not in ks[1]):\n                num_ = int(re.findall(r'\\d', ks[1])[0]) - 1\n                prefix += f\".encode_modules.{num_}\"\n                convert_conv_bn(new_weight, prefix, ks[2:], v)\n            elif (\"rebnconv\" in ks[1]) and (\"d\" in ks[1]):\n                num_ = layers[\"encode\"][num] - int(re.findall(r'\\d', ks[1])[0]) - 1\n                prefix += f\".decode_modules.{num_}\"\n                convert_conv_bn(new_weight, prefix, ks[2:], v)\n            else:\n                print(f\"unrecognized key: {k}\")\n\n        elif (\"stage\" in ks[0]) and (\"d\" in ks[0]):\n            # decode stage\n            num = 5 - int(re.findall(r'\\d', ks[0])[0])\n            prefix = f\"decode_modules.{num}\"\n            if \"rebnconvin\" == ks[1]:\n                # ConvBNReLU module\n                prefix += \".conv_in\"\n                convert_conv_bn(new_weight, prefix, ks[2:], v)\n            elif (\"rebnconv\" in ks[1]) and (\"d\" not in ks[1]):\n                num_ = int(re.findall(r'\\d', ks[1])[0]) - 1\n                prefix += f\".encode_modules.{num_}\"\n                convert_conv_bn(new_weight, prefix, ks[2:], v)\n            elif (\"rebnconv\" in ks[1]) and (\"d\" in ks[1]):\n                num_ = layers[\"decode\"][num] - int(re.findall(r'\\d', ks[1])[0]) - 1\n                prefix += f\".decode_modules.{num_}\"\n                convert_conv_bn(new_weight, prefix, ks[2:], v)\n            else:\n                print(f\"unrecognized key: {k}\")\n        elif \"side\" in ks[0]:\n            # side\n            num = 6 - int(re.findall(r'\\d', ks[0])[0])\n            prefix = f\"side_modules.{num}\"\n            if \"weight\" == ks[1]:\n                new_weight[prefix + \".weight\"] = v\n            elif \"bias\" == ks[1]:\n                new_weight[prefix + \".bias\"] = v\n            else:\n                print(f\"unrecognized weight {prefix + ks[1]}\")\n        elif \"outconv\" in ks[0]:\n            prefix = f\"out_conv\"\n            if \"weight\" == ks[1]:\n                new_weight[prefix + \".weight\"] = v\n            elif \"bias\" == ks[1]:\n                new_weight[prefix + \".bias\"] = v\n            else:\n                print(f\"unrecognized weight {prefix + ks[1]}\")\n        else:\n            print(f\"unrecognized key: {k}\")\n\n    return new_weight\n\n\ndef main_1():\n    from u2net import U2NET, U2NETP\n\n    old_m = U2NET()\n    old_m.load_state_dict(torch.load(\"u2net.pth\", map_location='cpu'))\n    new_m = u2net_full()\n\n    # old_m = U2NETP()\n    # old_m.load_state_dict(torch.load(\"u2netp.pth\", map_location='cpu'))\n    # new_m = u2net_lite()\n\n    old_w = old_m.state_dict()\n\n    w = convert(old_w)\n    new_m.load_state_dict(w, strict=True)\n\n    torch.random.manual_seed(0)\n    x = torch.randn(1, 3, 288, 288)\n    old_m.eval()\n    new_m.eval()\n    with torch.no_grad():\n        out1 = old_m(x)[0]\n        out2 = new_m(x)\n        assert torch.equal(out1, out2)\n        torch.save(new_m.state_dict(), \"u2net_full.pth\")\n\n\ndef main():\n    old_w = torch.load(\"u2net.pth\", map_location='cpu')\n    new_m = u2net_full()\n\n    # old_w = torch.load(\"u2netp.pth\", map_location='cpu')\n    # new_m = u2net_lite()\n\n    w = convert(old_w)\n    new_m.load_state_dict(w, strict=True)\n    torch.save(new_m.state_dict(), \"u2net_full.pth\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_segmentation/u2net/my_dataset.py",
    "content": "import os\n\nimport cv2\nimport torch.utils.data as data\n\n\nclass DUTSDataset(data.Dataset):\n    def __init__(self, root: str, train: bool = True, transforms=None):\n        assert os.path.exists(root), f\"path '{root}' does not exist.\"\n        if train:\n            self.image_root = os.path.join(root, \"DUTS-TR\", \"DUTS-TR-Image\")\n            self.mask_root = os.path.join(root, \"DUTS-TR\", \"DUTS-TR-Mask\")\n        else:\n            self.image_root = os.path.join(root, \"DUTS-TE\", \"DUTS-TE-Image\")\n            self.mask_root = os.path.join(root, \"DUTS-TE\", \"DUTS-TE-Mask\")\n        assert os.path.exists(self.image_root), f\"path '{self.image_root}' does not exist.\"\n        assert os.path.exists(self.mask_root), f\"path '{self.mask_root}' does not exist.\"\n\n        image_names = [p for p in os.listdir(self.image_root) if p.endswith(\".jpg\")]\n        mask_names = [p for p in os.listdir(self.mask_root) if p.endswith(\".png\")]\n        assert len(image_names) > 0, f\"not find any images in {self.image_root}.\"\n\n        # check images and mask\n        re_mask_names = []\n        for p in image_names:\n            mask_name = p.replace(\".jpg\", \".png\")\n            assert mask_name in mask_names, f\"{p} has no corresponding mask.\"\n            re_mask_names.append(mask_name)\n        mask_names = re_mask_names\n\n        self.images_path = [os.path.join(self.image_root, n) for n in image_names]\n        self.masks_path = [os.path.join(self.mask_root, n) for n in mask_names]\n\n        self.transforms = transforms\n\n    def __getitem__(self, idx):\n        image_path = self.images_path[idx]\n        mask_path = self.masks_path[idx]\n        image = cv2.imread(image_path, flags=cv2.IMREAD_COLOR)\n        assert image is not None, f\"failed to read image: {image_path}\"\n        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # BGR -> RGB\n        h, w, _ = image.shape\n\n        target = cv2.imread(mask_path, flags=cv2.IMREAD_GRAYSCALE)\n        assert target is not None, f\"failed to read mask: {mask_path}\"\n\n        if self.transforms is not None:\n            image, target = self.transforms(image, target)\n\n        return image, target\n\n    def __len__(self):\n        return len(self.images_path)\n\n    @staticmethod\n    def collate_fn(batch):\n        images, targets = list(zip(*batch))\n        batched_imgs = cat_list(images, fill_value=0)\n        batched_targets = cat_list(targets, fill_value=0)\n\n        return batched_imgs, batched_targets\n\n\ndef cat_list(images, fill_value=0):\n    max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))\n    batch_shape = (len(images),) + max_size\n    batched_imgs = images[0].new(*batch_shape).fill_(fill_value)\n    for img, pad_img in zip(images, batched_imgs):\n        pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img)\n    return batched_imgs\n\n\nif __name__ == '__main__':\n    train_dataset = DUTSDataset(\"./\", train=True)\n    print(len(train_dataset))\n\n    val_dataset = DUTSDataset(\"./\", train=False)\n    print(len(val_dataset))\n\n    i, t = train_dataset[0]\n"
  },
  {
    "path": "pytorch_segmentation/u2net/predict.py",
    "content": "import os\nimport time\n\nimport cv2\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport torch\nfrom torchvision.transforms import transforms\n\nfrom src import u2net_full\n\n\ndef time_synchronized():\n    torch.cuda.synchronize() if torch.cuda.is_available() else None\n    return time.time()\n\n\ndef main():\n    weights_path = \"./u2net_full.pth\"\n    img_path = \"./test.png\"\n    threshold = 0.5\n\n    assert os.path.exists(img_path), f\"image file {img_path} dose not exists.\"\n\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n\n    data_transform = transforms.Compose([\n        transforms.ToTensor(),\n        transforms.Resize(320),\n        transforms.Normalize(mean=(0.485, 0.456, 0.406),\n                             std=(0.229, 0.224, 0.225))\n    ])\n\n    origin_img = cv2.cvtColor(cv2.imread(img_path, flags=cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)\n\n    h, w = origin_img.shape[:2]\n    img = data_transform(origin_img)\n    img = torch.unsqueeze(img, 0).to(device)  # [C, H, W] -> [1, C, H, W]\n\n    model = u2net_full()\n    weights = torch.load(weights_path, map_location='cpu')\n    if \"model\" in weights:\n        model.load_state_dict(weights[\"model\"])\n    else:\n        model.load_state_dict(weights)\n    model.to(device)\n    model.eval()\n\n    with torch.no_grad():\n        # init model\n        img_height, img_width = img.shape[-2:]\n        init_img = torch.zeros((1, 3, img_height, img_width), device=device)\n        model(init_img)\n\n        t_start = time_synchronized()\n        pred = model(img)\n        t_end = time_synchronized()\n        print(\"inference time: {}\".format(t_end - t_start))\n        pred = torch.squeeze(pred).to(\"cpu\").numpy()  # [1, 1, H, W] -> [H, W]\n\n        pred = cv2.resize(pred, dsize=(w, h), interpolation=cv2.INTER_LINEAR)\n        pred_mask = np.where(pred > threshold, 1, 0)\n        origin_img = np.array(origin_img, dtype=np.uint8)\n        seg_img = origin_img * pred_mask[..., None]\n        plt.imshow(seg_img)\n        plt.show()\n        cv2.imwrite(\"pred_result.png\", cv2.cvtColor(seg_img.astype(np.uint8), cv2.COLOR_RGB2BGR))\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_segmentation/u2net/requirements.txt",
    "content": "numpy==1.22.0\ntorch==1.13.1\ntorchvision==0.11.1\nopencv_python==4.5.4.60\n"
  },
  {
    "path": "pytorch_segmentation/u2net/results20220723-123632.txt",
    "content": "[epoch: 0] train_loss: 2.7385 lr: 0.002002 MAE: 0.465 maxF1: 0.464 \n[epoch: 10] train_loss: 1.0385 lr: 0.003994 MAE: 0.124 maxF1: 0.719 \n[epoch: 20] train_loss: 0.7629 lr: 0.003972 MAE: 0.077 maxF1: 0.787 \n[epoch: 30] train_loss: 0.6758 lr: 0.003936 MAE: 0.083 maxF1: 0.791 \n[epoch: 40] train_loss: 0.4905 lr: 0.003884 MAE: 0.073 maxF1: 0.805 \n[epoch: 50] train_loss: 0.4337 lr: 0.003818 MAE: 0.063 maxF1: 0.821 \n[epoch: 60] train_loss: 0.4157 lr: 0.003738 MAE: 0.067 maxF1: 0.818 \n[epoch: 70] train_loss: 0.3424 lr: 0.003644 MAE: 0.058 maxF1: 0.840 \n[epoch: 80] train_loss: 0.2909 lr: 0.003538 MAE: 0.057 maxF1: 0.842 \n[epoch: 90] train_loss: 0.3220 lr: 0.003420 MAE: 0.064 maxF1: 0.837 \n[epoch: 100] train_loss: 0.2653 lr: 0.003292 MAE: 0.055 maxF1: 0.847 \n[epoch: 110] train_loss: 0.2627 lr: 0.003153 MAE: 0.055 maxF1: 0.846 \n[epoch: 120] train_loss: 0.3230 lr: 0.003005 MAE: 0.058 maxF1: 0.837 \n[epoch: 130] train_loss: 0.2177 lr: 0.002850 MAE: 0.053 maxF1: 0.852 \n[epoch: 140] train_loss: 0.2807 lr: 0.002688 MAE: 0.061 maxF1: 0.824 \n[epoch: 150] train_loss: 0.2091 lr: 0.002520 MAE: 0.057 maxF1: 0.846 \n[epoch: 160] train_loss: 0.1971 lr: 0.002349 MAE: 0.049 maxF1: 0.857 \n[epoch: 170] train_loss: 0.2157 lr: 0.002175 MAE: 0.050 maxF1: 0.851 \n[epoch: 180] train_loss: 0.1881 lr: 0.002000 MAE: 0.048 maxF1: 0.857 \n[epoch: 190] train_loss: 0.1855 lr: 0.001825 MAE: 0.047 maxF1: 0.860 \n[epoch: 200] train_loss: 0.1817 lr: 0.001651 MAE: 0.047 maxF1: 0.863 \n[epoch: 210] train_loss: 0.1740 lr: 0.001480 MAE: 0.048 maxF1: 0.858 \n[epoch: 220] train_loss: 0.1707 lr: 0.001312 MAE: 0.048 maxF1: 0.860 \n[epoch: 230] train_loss: 0.1653 lr: 0.001150 MAE: 0.048 maxF1: 0.859 \n[epoch: 240] train_loss: 0.1652 lr: 0.000995 MAE: 0.046 maxF1: 0.860 \n[epoch: 250] train_loss: 0.1631 lr: 0.000847 MAE: 0.048 maxF1: 0.857 \n[epoch: 260] train_loss: 0.1584 lr: 0.000708 MAE: 0.047 maxF1: 0.862 \n[epoch: 270] train_loss: 0.1590 lr: 0.000580 MAE: 0.047 maxF1: 0.860 \n[epoch: 280] train_loss: 0.1521 lr: 0.000462 MAE: 0.047 maxF1: 0.861 \n[epoch: 290] train_loss: 0.1535 lr: 0.000356 MAE: 0.047 maxF1: 0.861 \n[epoch: 300] train_loss: 0.1520 lr: 0.000262 MAE: 0.047 maxF1: 0.860 \n[epoch: 310] train_loss: 0.1488 lr: 0.000182 MAE: 0.047 maxF1: 0.860 \n[epoch: 320] train_loss: 0.1493 lr: 0.000116 MAE: 0.047 maxF1: 0.859 \n[epoch: 330] train_loss: 0.1470 lr: 0.000064 MAE: 0.047 maxF1: 0.860 \n[epoch: 340] train_loss: 0.1493 lr: 0.000028 MAE: 0.047 maxF1: 0.859 \n[epoch: 350] train_loss: 0.1482 lr: 0.000006 MAE: 0.047 maxF1: 0.858 \n[epoch: 359] train_loss: 0.1518 lr: 0.000000 MAE: 0.047 maxF1: 0.859 \n"
  },
  {
    "path": "pytorch_segmentation/u2net/src/__init__.py",
    "content": "from .model import u2net_full, u2net_lite\n"
  },
  {
    "path": "pytorch_segmentation/u2net/src/model.py",
    "content": "from typing import Union, List\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\n\nclass ConvBNReLU(nn.Module):\n    def __init__(self, in_ch: int, out_ch: int, kernel_size: int = 3, dilation: int = 1):\n        super().__init__()\n\n        padding = kernel_size // 2 if dilation == 1 else dilation\n        self.conv = nn.Conv2d(in_ch, out_ch, kernel_size, padding=padding, dilation=dilation, bias=False)\n        self.bn = nn.BatchNorm2d(out_ch)\n        self.relu = nn.ReLU(inplace=True)\n\n    def forward(self, x: torch.Tensor) -> torch.Tensor:\n        return self.relu(self.bn(self.conv(x)))\n\n\nclass DownConvBNReLU(ConvBNReLU):\n    def __init__(self, in_ch: int, out_ch: int, kernel_size: int = 3, dilation: int = 1, flag: bool = True):\n        super().__init__(in_ch, out_ch, kernel_size, dilation)\n        self.down_flag = flag\n\n    def forward(self, x: torch.Tensor) -> torch.Tensor:\n        if self.down_flag:\n            x = F.max_pool2d(x, kernel_size=2, stride=2, ceil_mode=True)\n\n        return self.relu(self.bn(self.conv(x)))\n\n\nclass UpConvBNReLU(ConvBNReLU):\n    def __init__(self, in_ch: int, out_ch: int, kernel_size: int = 3, dilation: int = 1, flag: bool = True):\n        super().__init__(in_ch, out_ch, kernel_size, dilation)\n        self.up_flag = flag\n\n    def forward(self, x1: torch.Tensor, x2: torch.Tensor) -> torch.Tensor:\n        if self.up_flag:\n            x1 = F.interpolate(x1, size=x2.shape[2:], mode='bilinear', align_corners=False)\n        return self.relu(self.bn(self.conv(torch.cat([x1, x2], dim=1))))\n\n\nclass RSU(nn.Module):\n    def __init__(self, height: int, in_ch: int, mid_ch: int, out_ch: int):\n        super().__init__()\n\n        assert height >= 2\n        self.conv_in = ConvBNReLU(in_ch, out_ch)\n\n        encode_list = [DownConvBNReLU(out_ch, mid_ch, flag=False)]\n        decode_list = [UpConvBNReLU(mid_ch * 2, mid_ch, flag=False)]\n        for i in range(height - 2):\n            encode_list.append(DownConvBNReLU(mid_ch, mid_ch))\n            decode_list.append(UpConvBNReLU(mid_ch * 2, mid_ch if i < height - 3 else out_ch))\n\n        encode_list.append(ConvBNReLU(mid_ch, mid_ch, dilation=2))\n        self.encode_modules = nn.ModuleList(encode_list)\n        self.decode_modules = nn.ModuleList(decode_list)\n\n    def forward(self, x: torch.Tensor) -> torch.Tensor:\n        x_in = self.conv_in(x)\n\n        x = x_in\n        encode_outputs = []\n        for m in self.encode_modules:\n            x = m(x)\n            encode_outputs.append(x)\n\n        x = encode_outputs.pop()\n        for m in self.decode_modules:\n            x2 = encode_outputs.pop()\n            x = m(x, x2)\n\n        return x + x_in\n\n\nclass RSU4F(nn.Module):\n    def __init__(self, in_ch: int, mid_ch: int, out_ch: int):\n        super().__init__()\n        self.conv_in = ConvBNReLU(in_ch, out_ch)\n        self.encode_modules = nn.ModuleList([ConvBNReLU(out_ch, mid_ch),\n                                             ConvBNReLU(mid_ch, mid_ch, dilation=2),\n                                             ConvBNReLU(mid_ch, mid_ch, dilation=4),\n                                             ConvBNReLU(mid_ch, mid_ch, dilation=8)])\n\n        self.decode_modules = nn.ModuleList([ConvBNReLU(mid_ch * 2, mid_ch, dilation=4),\n                                             ConvBNReLU(mid_ch * 2, mid_ch, dilation=2),\n                                             ConvBNReLU(mid_ch * 2, out_ch)])\n\n    def forward(self, x: torch.Tensor) -> torch.Tensor:\n        x_in = self.conv_in(x)\n\n        x = x_in\n        encode_outputs = []\n        for m in self.encode_modules:\n            x = m(x)\n            encode_outputs.append(x)\n\n        x = encode_outputs.pop()\n        for m in self.decode_modules:\n            x2 = encode_outputs.pop()\n            x = m(torch.cat([x, x2], dim=1))\n\n        return x + x_in\n\n\nclass U2Net(nn.Module):\n    def __init__(self, cfg: dict, out_ch: int = 1):\n        super().__init__()\n        assert \"encode\" in cfg\n        assert \"decode\" in cfg\n        self.encode_num = len(cfg[\"encode\"])\n\n        encode_list = []\n        side_list = []\n        for c in cfg[\"encode\"]:\n            # c: [height, in_ch, mid_ch, out_ch, RSU4F, side]\n            assert len(c) == 6\n            encode_list.append(RSU(*c[:4]) if c[4] is False else RSU4F(*c[1:4]))\n\n            if c[5] is True:\n                side_list.append(nn.Conv2d(c[3], out_ch, kernel_size=3, padding=1))\n        self.encode_modules = nn.ModuleList(encode_list)\n\n        decode_list = []\n        for c in cfg[\"decode\"]:\n            # c: [height, in_ch, mid_ch, out_ch, RSU4F, side]\n            assert len(c) == 6\n            decode_list.append(RSU(*c[:4]) if c[4] is False else RSU4F(*c[1:4]))\n\n            if c[5] is True:\n                side_list.append(nn.Conv2d(c[3], out_ch, kernel_size=3, padding=1))\n        self.decode_modules = nn.ModuleList(decode_list)\n        self.side_modules = nn.ModuleList(side_list)\n        self.out_conv = nn.Conv2d(self.encode_num * out_ch, out_ch, kernel_size=1)\n\n    def forward(self, x: torch.Tensor) -> Union[torch.Tensor, List[torch.Tensor]]:\n        _, _, h, w = x.shape\n\n        # collect encode outputs\n        encode_outputs = []\n        for i, m in enumerate(self.encode_modules):\n            x = m(x)\n            encode_outputs.append(x)\n            if i != self.encode_num - 1:\n                x = F.max_pool2d(x, kernel_size=2, stride=2, ceil_mode=True)\n\n        # collect decode outputs\n        x = encode_outputs.pop()\n        decode_outputs = [x]\n        for m in self.decode_modules:\n            x2 = encode_outputs.pop()\n            x = F.interpolate(x, size=x2.shape[2:], mode='bilinear', align_corners=False)\n            x = m(torch.concat([x, x2], dim=1))\n            decode_outputs.insert(0, x)\n\n        # collect side outputs\n        side_outputs = []\n        for m in self.side_modules:\n            x = decode_outputs.pop()\n            x = F.interpolate(m(x), size=[h, w], mode='bilinear', align_corners=False)\n            side_outputs.insert(0, x)\n\n        x = self.out_conv(torch.concat(side_outputs, dim=1))\n\n        if self.training:\n            # do not use torch.sigmoid for amp safe\n            return [x] + side_outputs\n        else:\n            return torch.sigmoid(x)\n\n\ndef u2net_full(out_ch: int = 1):\n    cfg = {\n        # height, in_ch, mid_ch, out_ch, RSU4F, side\n        \"encode\": [[7, 3, 32, 64, False, False],      # En1\n                   [6, 64, 32, 128, False, False],    # En2\n                   [5, 128, 64, 256, False, False],   # En3\n                   [4, 256, 128, 512, False, False],  # En4\n                   [4, 512, 256, 512, True, False],   # En5\n                   [4, 512, 256, 512, True, True]],   # En6\n        # height, in_ch, mid_ch, out_ch, RSU4F, side\n        \"decode\": [[4, 1024, 256, 512, True, True],   # De5\n                   [4, 1024, 128, 256, False, True],  # De4\n                   [5, 512, 64, 128, False, True],    # De3\n                   [6, 256, 32, 64, False, True],     # De2\n                   [7, 128, 16, 64, False, True]]     # De1\n    }\n\n    return U2Net(cfg, out_ch)\n\n\ndef u2net_lite(out_ch: int = 1):\n    cfg = {\n        # height, in_ch, mid_ch, out_ch, RSU4F, side\n        \"encode\": [[7, 3, 16, 64, False, False],  # En1\n                   [6, 64, 16, 64, False, False],  # En2\n                   [5, 64, 16, 64, False, False],  # En3\n                   [4, 64, 16, 64, False, False],  # En4\n                   [4, 64, 16, 64, True, False],  # En5\n                   [4, 64, 16, 64, True, True]],  # En6\n        # height, in_ch, mid_ch, out_ch, RSU4F, side\n        \"decode\": [[4, 128, 16, 64, True, True],  # De5\n                   [4, 128, 16, 64, False, True],  # De4\n                   [5, 128, 16, 64, False, True],  # De3\n                   [6, 128, 16, 64, False, True],  # De2\n                   [7, 128, 16, 64, False, True]]  # De1\n    }\n\n    return U2Net(cfg, out_ch)\n\n\ndef convert_onnx(m, save_path):\n    m.eval()\n    x = torch.rand(1, 3, 288, 288, requires_grad=True)\n\n    # export the model\n    torch.onnx.export(m,  # model being run\n                      x,  # model input (or a tuple for multiple inputs)\n                      save_path,  # where to save the model (can be a file or file-like object)\n                      export_params=True,\n                      opset_version=11)\n\n\nif __name__ == '__main__':\n    # n_m = RSU(height=7, in_ch=3, mid_ch=12, out_ch=3)\n    # convert_onnx(n_m, \"RSU7.onnx\")\n    #\n    # n_m = RSU4F(in_ch=3, mid_ch=12, out_ch=3)\n    # convert_onnx(n_m, \"RSU4F.onnx\")\n\n    u2net = u2net_full()\n    convert_onnx(u2net, \"u2net_full.onnx\")\n"
  },
  {
    "path": "pytorch_segmentation/u2net/train.py",
    "content": "import os\nimport time\nimport datetime\nfrom typing import Union, List\n\nimport torch\nfrom torch.utils import data\n\nfrom src import u2net_full\nfrom train_utils import train_one_epoch, evaluate, get_params_groups, create_lr_scheduler\nfrom my_dataset import DUTSDataset\nimport transforms as T\n\n\nclass SODPresetTrain:\n    def __init__(self, base_size: Union[int, List[int]], crop_size: int,\n                 hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        self.transforms = T.Compose([\n            T.ToTensor(),\n            T.Resize(base_size, resize_mask=True),\n            T.RandomCrop(crop_size),\n            T.RandomHorizontalFlip(hflip_prob),\n            T.Normalize(mean=mean, std=std)\n        ])\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\nclass SODPresetEval:\n    def __init__(self, base_size: Union[int, List[int]], mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        self.transforms = T.Compose([\n            T.ToTensor(),\n            T.Resize(base_size, resize_mask=False),\n            T.Normalize(mean=mean, std=std),\n        ])\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n    batch_size = args.batch_size\n\n    # 用来保存训练以及验证过程中信息\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    train_dataset = DUTSDataset(args.data_path, train=True, transforms=SODPresetTrain([320, 320], crop_size=288))\n    val_dataset = DUTSDataset(args.data_path, train=False, transforms=SODPresetEval([320, 320]))\n\n    num_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])\n    train_data_loader = data.DataLoader(train_dataset,\n                                        batch_size=batch_size,\n                                        num_workers=num_workers,\n                                        shuffle=True,\n                                        pin_memory=True,\n                                        collate_fn=train_dataset.collate_fn)\n\n    val_data_loader = data.DataLoader(val_dataset,\n                                      batch_size=1,  # must be 1\n                                      num_workers=num_workers,\n                                      pin_memory=True,\n                                      collate_fn=val_dataset.collate_fn)\n\n    model = u2net_full()\n    model.to(device)\n\n    params_group = get_params_groups(model, weight_decay=args.weight_decay)\n    optimizer = torch.optim.AdamW(params_group, lr=args.lr, weight_decay=args.weight_decay)\n    lr_scheduler = create_lr_scheduler(optimizer, len(train_data_loader), args.epochs,\n                                       warmup=True, warmup_epochs=2)\n\n    scaler = torch.cuda.amp.GradScaler() if args.amp else None\n\n    if args.resume:\n        checkpoint = torch.load(args.resume, map_location='cpu')\n        model.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n        if args.amp:\n            scaler.load_state_dict(checkpoint[\"scaler\"])\n\n    current_mae, current_f1 = 1.0, 0.0\n    start_time = time.time()\n    for epoch in range(args.start_epoch, args.epochs):\n        mean_loss, lr = train_one_epoch(model, optimizer, train_data_loader, device, epoch,\n                                        lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler)\n\n        save_file = {\"model\": model.state_dict(),\n                     \"optimizer\": optimizer.state_dict(),\n                     \"lr_scheduler\": lr_scheduler.state_dict(),\n                     \"epoch\": epoch,\n                     \"args\": args}\n        if args.amp:\n            save_file[\"scaler\"] = scaler.state_dict()\n\n        if epoch % args.eval_interval == 0 or epoch == args.epochs - 1:\n            # 每间隔eval_interval个epoch验证一次，减少验证频率节省训练时间\n            mae_metric, f1_metric = evaluate(model, val_data_loader, device=device)\n            mae_info, f1_info = mae_metric.compute(), f1_metric.compute()\n            print(f\"[epoch: {epoch}] val_MAE: {mae_info:.3f} val_maxF1: {f1_info:.3f}\")\n            # write into txt\n            with open(results_file, \"a\") as f:\n                # 记录每个epoch对应的train_loss、lr以及验证集各指标\n                write_info = f\"[epoch: {epoch}] train_loss: {mean_loss:.4f} lr: {lr:.6f} \" \\\n                             f\"MAE: {mae_info:.3f} maxF1: {f1_info:.3f} \\n\"\n                f.write(write_info)\n\n            # save_best\n            if current_mae >= mae_info and current_f1 <= f1_info:\n                torch.save(save_file, \"save_weights/model_best.pth\")\n\n        # only save latest 10 epoch weights\n        if os.path.exists(f\"save_weights/model_{epoch-10}.pth\"):\n            os.remove(f\"save_weights/model_{epoch-10}.pth\")\n\n        torch.save(save_file, f\"save_weights/model_{epoch}.pth\")\n\n    total_time = time.time() - start_time\n    total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n    print(\"training time {}\".format(total_time_str))\n\n\ndef parse_args():\n    import argparse\n    parser = argparse.ArgumentParser(description=\"pytorch u2net training\")\n\n    parser.add_argument(\"--data-path\", default=\"./\", help=\"DUTS root\")\n    parser.add_argument(\"--device\", default=\"cuda\", help=\"training device\")\n    parser.add_argument(\"-b\", \"--batch-size\", default=16, type=int)\n    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,\n                        metavar='W', help='weight decay (default: 1e-4)',\n                        dest='weight_decay')\n    parser.add_argument(\"--epochs\", default=360, type=int, metavar=\"N\",\n                        help=\"number of total epochs to train\")\n    parser.add_argument(\"--eval-interval\", default=10, type=int, help=\"validation interval default 10 Epochs\")\n\n    parser.add_argument('--lr', default=0.001, type=float, help='initial learning rate')\n    parser.add_argument('--print-freq', default=50, type=int, help='print frequency')\n    parser.add_argument('--resume', default='', help='resume from checkpoint')\n    parser.add_argument('--start-epoch', default=0, type=int, metavar='N',\n                        help='start epoch')\n    # Mixed precision training parameters\n    parser.add_argument(\"--amp\", action='store_true',\n                        help=\"Use torch.cuda.amp for mixed precision training\")\n\n    args = parser.parse_args()\n\n    return args\n\n\nif __name__ == '__main__':\n    args = parse_args()\n\n    if not os.path.exists(\"./save_weights\"):\n        os.mkdir(\"./save_weights\")\n\n    main(args)\n"
  },
  {
    "path": "pytorch_segmentation/u2net/train_multi_GPU.py",
    "content": "import time\nimport os\nimport datetime\nfrom typing import Union, List\n\nimport torch\nfrom torch.utils import data\n\nfrom src import u2net_full\nfrom train_utils import (train_one_epoch, evaluate, init_distributed_mode, save_on_master, mkdir,\n                         create_lr_scheduler, get_params_groups)\nfrom my_dataset import DUTSDataset\nimport transforms as T\n\n\nclass SODPresetTrain:\n    def __init__(self, base_size: Union[int, List[int]], crop_size: int,\n                 hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        self.transforms = T.Compose([\n            T.ToTensor(),\n            T.Resize(base_size, resize_mask=True),\n            T.RandomCrop(crop_size),\n            T.RandomHorizontalFlip(hflip_prob),\n            T.Normalize(mean=mean, std=std)\n        ])\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\nclass SODPresetEval:\n    def __init__(self, base_size: Union[int, List[int]], mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        self.transforms = T.Compose([\n            T.ToTensor(),\n            T.Resize(base_size, resize_mask=False),\n            T.Normalize(mean=mean, std=std),\n        ])\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\ndef main(args):\n    init_distributed_mode(args)\n    print(args)\n\n    device = torch.device(args.device)\n\n    # 用来保存训练以及验证过程中信息\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    train_dataset = DUTSDataset(args.data_path, train=True, transforms=SODPresetTrain([320, 320], crop_size=288))\n    val_dataset = DUTSDataset(args.data_path, train=False, transforms=SODPresetEval([320, 320]))\n\n    print(\"Creating data loaders\")\n    if args.distributed:\n        train_sampler = data.distributed.DistributedSampler(train_dataset)\n        test_sampler = data.distributed.DistributedSampler(val_dataset)\n    else:\n        train_sampler = data.RandomSampler(train_dataset)\n        test_sampler = data.SequentialSampler(val_dataset)\n\n    train_data_loader = data.DataLoader(\n        train_dataset, batch_size=args.batch_size,\n        sampler=train_sampler, num_workers=args.workers,\n        pin_memory=True, collate_fn=train_dataset.collate_fn, drop_last=True)\n\n    val_data_loader = data.DataLoader(\n        val_dataset, batch_size=1,  # batch_size must be 1\n        sampler=test_sampler, num_workers=args.workers,\n        pin_memory=True, collate_fn=train_dataset.collate_fn)\n\n    # create model num_classes equal background + 20 classes\n    model = u2net_full()\n    model.to(device)\n\n    if args.sync_bn:\n        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)\n\n    model_without_ddp = model\n    if args.distributed:\n        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])\n        model_without_ddp = model.module\n\n    params_group = get_params_groups(model, weight_decay=args.weight_decay)\n    optimizer = torch.optim.AdamW(params_group, lr=args.lr, weight_decay=args.weight_decay)\n    lr_scheduler = create_lr_scheduler(optimizer, len(train_data_loader), args.epochs,\n                                       warmup=True, warmup_epochs=2)\n\n    scaler = torch.cuda.amp.GradScaler() if args.amp else None\n\n    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练\n    if args.resume:\n        # If map_location is missing, torch.load will first load the module to CPU\n        # and then copy each parameter to where it was saved,\n        # which would result in all processes on the same machine using the same set of devices.\n        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)\n        model_without_ddp.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n        if args.amp:\n            scaler.load_state_dict(checkpoint[\"scaler\"])\n\n    if args.test_only:\n        mae_metric, f1_metric = evaluate(model, val_data_loader, device=device)\n        print(mae_metric, f1_metric)\n        return\n\n    print(\"Start training\")\n    current_mae, current_f1 = 1.0, 0.0\n    start_time = time.time()\n    for epoch in range(args.start_epoch, args.epochs):\n        if args.distributed:\n            train_sampler.set_epoch(epoch)\n\n        mean_loss, lr = train_one_epoch(model, optimizer, train_data_loader, device, epoch,\n                                        lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler)\n\n        save_file = {'model': model_without_ddp.state_dict(),\n                     'optimizer': optimizer.state_dict(),\n                     \"lr_scheduler\": lr_scheduler.state_dict(),\n                     'args': args,\n                     'epoch': epoch}\n        if args.amp:\n            save_file[\"scaler\"] = scaler.state_dict()\n\n        if epoch % args.eval_interval == 0 or epoch == args.epochs - 1:\n            # 每间隔eval_interval个epoch验证一次，减少验证频率节省训练时间\n            mae_metric, f1_metric = evaluate(model, val_data_loader, device=device)\n            mae_info, f1_info = mae_metric.compute(), f1_metric.compute()\n            print(f\"[epoch: {epoch}] val_MAE: {mae_info:.3f} val_maxF1: {f1_info:.3f}\")\n\n            # 只在主进程上进行写操作\n            if args.rank in [-1, 0]:\n                # write into txt\n                with open(results_file, \"a\") as f:\n                    # 记录每个epoch对应的train_loss、lr以及验证集各指标\n                    write_info = f\"[epoch: {epoch}] train_loss: {mean_loss:.4f} lr: {lr:.6f} \" \\\n                                 f\"MAE: {mae_info:.3f} maxF1: {f1_info:.3f} \\n\"\n                    f.write(write_info)\n\n                # save_best\n                if current_mae >= mae_info and current_f1 <= f1_info:\n                    if args.output_dir:\n                        # 只在主节点上执行保存权重操作\n                        save_on_master(save_file,\n                                       os.path.join(args.output_dir, 'model_best.pth'))\n\n        if args.output_dir:\n            if args.rank in [-1, 0]:\n                # only save latest 10 epoch weights\n                if os.path.exists(os.path.join(args.output_dir, f'model_{epoch - 10}.pth')):\n                    os.remove(os.path.join(args.output_dir, f'model_{epoch - 10}.pth'))\n\n            # 只在主节点上执行保存权重操作\n            save_on_master(save_file,\n                           os.path.join(args.output_dir, f'model_{epoch}.pth'))\n\n    total_time = time.time() - start_time\n    total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n    print('Training time {}'.format(total_time_str))\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 训练文件的根目录(VOCdevkit)\n    parser.add_argument('--data-path', default='./', help='DUTS root')\n    # 训练设备类型\n    parser.add_argument('--device', default='cuda', help='device')\n    # 每块GPU上的batch_size\n    parser.add_argument('-b', '--batch-size', default=16, type=int,\n                        help='images per gpu, the total batch size is $NGPU x batch_size')\n    # 指定接着从哪个epoch数开始训练\n    parser.add_argument('--start-epoch', default=0, type=int, help='start epoch')\n    # 训练的总epoch数\n    parser.add_argument('--epochs', default=360, type=int, metavar='N',\n                        help='number of total epochs to run')\n    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,\n                        metavar='W', help='weight decay (default: 1e-4)',\n                        dest='weight_decay')\n    # 是否使用同步BN(在多个GPU之间同步)，默认不开启，开启后训练速度会变慢\n    parser.add_argument('--sync-bn', action='store_true', help='whether using SyncBatchNorm')\n    # 数据加载以及预处理的线程数\n    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',\n                        help='number of data loading workers (default: 4)')\n    # 训练学习率\n    parser.add_argument('--lr', default=0.001, type=float,\n                        help='initial learning rate')\n    # 验证频率\n    parser.add_argument(\"--eval-interval\", default=10, type=int, help=\"validation interval default 10 Epochs\")\n    # 训练过程打印信息的频率\n    parser.add_argument('--print-freq', default=20, type=int, help='print frequency')\n    # 文件保存地址\n    parser.add_argument('--output-dir', default='./multi_train', help='path where to save')\n    # 基于上次的训练结果接着训练\n    parser.add_argument('--resume', default='', help='resume from checkpoint')\n    # 不训练，仅测试\n    parser.add_argument(\n        \"--test-only\",\n        dest=\"test_only\",\n        help=\"Only test the model\",\n        action=\"store_true\",\n    )\n\n    # 分布式进程数\n    parser.add_argument('--world-size', default=1, type=int,\n                        help='number of distributed processes')\n    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')\n    # Mixed precision training parameters\n    parser.add_argument(\"--amp\", action='store_true',\n                        help=\"Use torch.cuda.amp for mixed precision training\")\n\n    args = parser.parse_args()\n\n    # 如果指定了保存文件地址，检查文件夹是否存在，若不存在，则创建\n    if args.output_dir:\n        mkdir(args.output_dir)\n\n    main(args)\n"
  },
  {
    "path": "pytorch_segmentation/u2net/train_utils/__init__.py",
    "content": "from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler, get_params_groups\nfrom .distributed_utils import init_distributed_mode, save_on_master, mkdir\n"
  },
  {
    "path": "pytorch_segmentation/u2net/train_utils/distributed_utils.py",
    "content": "from collections import defaultdict, deque\nimport datetime\nimport time\nimport torch\nimport torch.distributed as dist\nimport torch.nn.functional as F\n\nimport errno\nimport os\n\n\nclass SmoothedValue(object):\n    \"\"\"Track a series of values and provide access to smoothed values over a\n    window or the global series average.\n    \"\"\"\n\n    def __init__(self, window_size=20, fmt=None):\n        if fmt is None:\n            fmt = \"{value:.4f} ({global_avg:.4f})\"\n        self.deque = deque(maxlen=window_size)\n        self.total = 0.0\n        self.count = 0\n        self.fmt = fmt\n\n    def update(self, value, n=1):\n        self.deque.append(value)\n        self.count += n\n        self.total += value * n\n\n    def synchronize_between_processes(self):\n        \"\"\"\n        Warning: does not synchronize the deque!\n        \"\"\"\n        if not is_dist_avail_and_initialized():\n            return\n        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')\n        dist.barrier()\n        dist.all_reduce(t)\n        t = t.tolist()\n        self.count = int(t[0])\n        self.total = t[1]\n\n    @property\n    def median(self):\n        d = torch.tensor(list(self.deque))\n        return d.median().item()\n\n    @property\n    def avg(self):\n        d = torch.tensor(list(self.deque), dtype=torch.float32)\n        return d.mean().item()\n\n    @property\n    def global_avg(self):\n        return self.total / self.count\n\n    @property\n    def max(self):\n        return max(self.deque)\n\n    @property\n    def value(self):\n        return self.deque[-1]\n\n    def __str__(self):\n        return self.fmt.format(\n            median=self.median,\n            avg=self.avg,\n            global_avg=self.global_avg,\n            max=self.max,\n            value=self.value)\n\n\ndef all_gather(data):\n    \"\"\"\n    收集各个进程中的数据\n    Run all_gather on arbitrary picklable data (not necessarily tensors)\n    Args:\n        data: any picklable object\n    Returns:\n        list[data]: list of data gathered from each rank\n    \"\"\"\n    world_size = get_world_size()  # 进程数\n    if world_size == 1:\n        return [data]\n\n    data_list = [None] * world_size\n    dist.all_gather_object(data_list, data)\n\n    return data_list\n\n\nclass MeanAbsoluteError(object):\n    def __init__(self):\n        self.mae_list = []\n\n    def update(self, pred: torch.Tensor, gt: torch.Tensor):\n        batch_size, c, h, w = gt.shape\n        assert batch_size == 1, f\"validation mode batch_size must be 1, but got batch_size: {batch_size}.\"\n        resize_pred = F.interpolate(pred, (h, w), mode=\"bilinear\", align_corners=False)\n        error_pixels = torch.sum(torch.abs(resize_pred - gt), dim=(1, 2, 3)) / (h * w)\n        self.mae_list.extend(error_pixels.tolist())\n\n    def compute(self):\n        mae = sum(self.mae_list) / len(self.mae_list)\n        return mae\n\n    def gather_from_all_processes(self):\n        if not torch.distributed.is_available():\n            return\n        if not torch.distributed.is_initialized():\n            return\n        torch.distributed.barrier()\n        gather_mae_list = []\n        for i in all_gather(self.mae_list):\n            gather_mae_list.extend(i)\n        self.mae_list = gather_mae_list\n\n    def __str__(self):\n        mae = self.compute()\n        return f'MAE: {mae:.3f}'\n\n\nclass F1Score(object):\n    \"\"\"\n    refer: https://github.com/xuebinqin/DIS/blob/main/IS-Net/basics.py\n    \"\"\"\n\n    def __init__(self, threshold: float = 0.5):\n        self.precision_cum = None\n        self.recall_cum = None\n        self.num_cum = None\n        self.threshold = threshold\n\n    def update(self, pred: torch.Tensor, gt: torch.Tensor):\n        batch_size, c, h, w = gt.shape\n        assert batch_size == 1, f\"validation mode batch_size must be 1, but got batch_size: {batch_size}.\"\n        resize_pred = F.interpolate(pred, (h, w), mode=\"bilinear\", align_corners=False)\n        gt_num = torch.sum(torch.gt(gt, self.threshold).float())\n\n        pp = resize_pred[torch.gt(gt, self.threshold)]  # 对应预测map中GT为前景的区域\n        nn = resize_pred[torch.le(gt, self.threshold)]  # 对应预测map中GT为背景的区域\n\n        pp_hist = torch.histc(pp, bins=255, min=0.0, max=1.0)\n        nn_hist = torch.histc(nn, bins=255, min=0.0, max=1.0)\n\n        # Sort according to the prediction probability from large to small\n        pp_hist_flip = torch.flipud(pp_hist)\n        nn_hist_flip = torch.flipud(nn_hist)\n\n        pp_hist_flip_cum = torch.cumsum(pp_hist_flip, dim=0)\n        nn_hist_flip_cum = torch.cumsum(nn_hist_flip, dim=0)\n\n        precision = pp_hist_flip_cum / (pp_hist_flip_cum + nn_hist_flip_cum + 1e-4)\n        recall = pp_hist_flip_cum / (gt_num + 1e-4)\n\n        if self.precision_cum is None:\n            self.precision_cum = torch.full_like(precision, fill_value=0.)\n\n        if self.recall_cum is None:\n            self.recall_cum = torch.full_like(recall, fill_value=0.)\n\n        if self.num_cum is None:\n            self.num_cum = torch.zeros([1], dtype=gt.dtype, device=gt.device)\n\n        self.precision_cum += precision\n        self.recall_cum += recall\n        self.num_cum += batch_size\n\n    def compute(self):\n        pre_mean = self.precision_cum / self.num_cum\n        rec_mean = self.recall_cum / self.num_cum\n        f1_mean = (1 + 0.3) * pre_mean * rec_mean / (0.3 * pre_mean + rec_mean + 1e-8)\n        max_f1 = torch.amax(f1_mean).item()\n        return max_f1\n\n    def reduce_from_all_processes(self):\n        if not torch.distributed.is_available():\n            return\n        if not torch.distributed.is_initialized():\n            return\n        torch.distributed.barrier()\n        torch.distributed.all_reduce(self.precision_cum)\n        torch.distributed.all_reduce(self.recall_cum)\n        torch.distributed.all_reduce(self.num_cum)\n\n    def __str__(self):\n        max_f1 = self.compute()\n        return f'maxF1: {max_f1:.3f}'\n\n\nclass MetricLogger(object):\n    def __init__(self, delimiter=\"\\t\"):\n        self.meters = defaultdict(SmoothedValue)\n        self.delimiter = delimiter\n\n    def update(self, **kwargs):\n        for k, v in kwargs.items():\n            if isinstance(v, torch.Tensor):\n                v = v.item()\n            assert isinstance(v, (float, int))\n            self.meters[k].update(v)\n\n    def __getattr__(self, attr):\n        if attr in self.meters:\n            return self.meters[attr]\n        if attr in self.__dict__:\n            return self.__dict__[attr]\n        raise AttributeError(\"'{}' object has no attribute '{}'\".format(\n            type(self).__name__, attr))\n\n    def __str__(self):\n        loss_str = []\n        for name, meter in self.meters.items():\n            loss_str.append(\n                \"{}: {}\".format(name, str(meter))\n            )\n        return self.delimiter.join(loss_str)\n\n    def synchronize_between_processes(self):\n        for meter in self.meters.values():\n            meter.synchronize_between_processes()\n\n    def add_meter(self, name, meter):\n        self.meters[name] = meter\n\n    def log_every(self, iterable, print_freq, header=None):\n        i = 0\n        if not header:\n            header = ''\n        start_time = time.time()\n        end = time.time()\n        iter_time = SmoothedValue(fmt='{avg:.4f}')\n        data_time = SmoothedValue(fmt='{avg:.4f}')\n        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'\n        if torch.cuda.is_available():\n            log_msg = self.delimiter.join([\n                header,\n                '[{0' + space_fmt + '}/{1}]',\n                'eta: {eta}',\n                '{meters}',\n                'time: {time}',\n                'data: {data}',\n                'max mem: {memory:.0f}'\n            ])\n        else:\n            log_msg = self.delimiter.join([\n                header,\n                '[{0' + space_fmt + '}/{1}]',\n                'eta: {eta}',\n                '{meters}',\n                'time: {time}',\n                'data: {data}'\n            ])\n        MB = 1024.0 * 1024.0\n        for obj in iterable:\n            data_time.update(time.time() - end)\n            yield obj\n            iter_time.update(time.time() - end)\n            if i % print_freq == 0:\n                eta_seconds = iter_time.global_avg * (len(iterable) - i)\n                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))\n                if torch.cuda.is_available():\n                    print(log_msg.format(\n                        i, len(iterable), eta=eta_string,\n                        meters=str(self),\n                        time=str(iter_time), data=str(data_time),\n                        memory=torch.cuda.max_memory_allocated() / MB))\n                else:\n                    print(log_msg.format(\n                        i, len(iterable), eta=eta_string,\n                        meters=str(self),\n                        time=str(iter_time), data=str(data_time)))\n            i += 1\n            end = time.time()\n        total_time = time.time() - start_time\n        total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n        print('{} Total time: {}'.format(header, total_time_str))\n\n\ndef mkdir(path):\n    try:\n        os.makedirs(path)\n    except OSError as e:\n        if e.errno != errno.EEXIST:\n            raise\n\n\ndef setup_for_distributed(is_master):\n    \"\"\"\n    This function disables printing when not in master process\n    \"\"\"\n    import builtins as __builtin__\n    builtin_print = __builtin__.print\n\n    def print(*args, **kwargs):\n        force = kwargs.pop('force', False)\n        if is_master or force:\n            builtin_print(*args, **kwargs)\n\n    __builtin__.print = print\n\n\ndef is_dist_avail_and_initialized():\n    if not dist.is_available():\n        return False\n    if not dist.is_initialized():\n        return False\n    return True\n\n\ndef get_world_size():\n    if not is_dist_avail_and_initialized():\n        return 1\n    return dist.get_world_size()\n\n\ndef get_rank():\n    if not is_dist_avail_and_initialized():\n        return 0\n    return dist.get_rank()\n\n\ndef is_main_process():\n    return get_rank() == 0\n\n\ndef save_on_master(*args, **kwargs):\n    if is_main_process():\n        torch.save(*args, **kwargs)\n\n\ndef init_distributed_mode(args):\n    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:\n        args.rank = int(os.environ[\"RANK\"])\n        args.world_size = int(os.environ['WORLD_SIZE'])\n        args.gpu = int(os.environ['LOCAL_RANK'])\n    elif 'SLURM_PROCID' in os.environ:\n        args.rank = int(os.environ['SLURM_PROCID'])\n        args.gpu = args.rank % torch.cuda.device_count()\n    elif hasattr(args, \"rank\"):\n        pass\n    else:\n        print('Not using distributed mode')\n        args.distributed = False\n        return\n\n    args.distributed = True\n\n    torch.cuda.set_device(args.gpu)\n    args.dist_backend = 'nccl'\n    print('| distributed init (rank {}): {}'.format(\n        args.rank, args.dist_url), flush=True)\n    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,\n                                         world_size=args.world_size, rank=args.rank)\n    setup_for_distributed(args.rank == 0)\n"
  },
  {
    "path": "pytorch_segmentation/u2net/train_utils/train_and_eval.py",
    "content": "import math\nimport torch\nfrom torch.nn import functional as F\nimport train_utils.distributed_utils as utils\n\n\ndef criterion(inputs, target):\n    losses = [F.binary_cross_entropy_with_logits(inputs[i], target) for i in range(len(inputs))]\n    total_loss = sum(losses)\n\n    return total_loss\n\n\ndef evaluate(model, data_loader, device):\n    model.eval()\n    mae_metric = utils.MeanAbsoluteError()\n    f1_metric = utils.F1Score()\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    header = 'Test:'\n    with torch.no_grad():\n        for images, targets in metric_logger.log_every(data_loader, 100, header):\n            images, targets = images.to(device), targets.to(device)\n            output = model(images)\n\n            # post norm\n            # ma = torch.max(output)\n            # mi = torch.min(output)\n            # output = (output - mi) / (ma - mi)\n\n            mae_metric.update(output, targets)\n            f1_metric.update(output, targets)\n\n        mae_metric.gather_from_all_processes()\n        f1_metric.reduce_from_all_processes()\n\n    return mae_metric, f1_metric\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch, lr_scheduler, print_freq=10, scaler=None):\n    model.train()\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))\n    header = 'Epoch: [{}]'.format(epoch)\n\n    for image, target in metric_logger.log_every(data_loader, print_freq, header):\n        image, target = image.to(device), target.to(device)\n        with torch.cuda.amp.autocast(enabled=scaler is not None):\n            output = model(image)\n            loss = criterion(output, target)\n\n        optimizer.zero_grad()\n        if scaler is not None:\n            scaler.scale(loss).backward()\n            scaler.step(optimizer)\n            scaler.update()\n        else:\n            loss.backward()\n            optimizer.step()\n\n        lr_scheduler.step()\n\n        lr = optimizer.param_groups[0][\"lr\"]\n        metric_logger.update(loss=loss.item(), lr=lr)\n\n    return metric_logger.meters[\"loss\"].global_avg, lr\n\n\ndef create_lr_scheduler(optimizer,\n                        num_step: int,\n                        epochs: int,\n                        warmup=True,\n                        warmup_epochs=1,\n                        warmup_factor=1e-3,\n                        end_factor=1e-6):\n    assert num_step > 0 and epochs > 0\n    if warmup is False:\n        warmup_epochs = 0\n\n    def f(x):\n        \"\"\"\n        根据step数返回一个学习率倍率因子，\n        注意在训练开始之前，pytorch会提前调用一次lr_scheduler.step()方法\n        \"\"\"\n        if warmup is True and x <= (warmup_epochs * num_step):\n            alpha = float(x) / (warmup_epochs * num_step)\n            # warmup过程中lr倍率因子从warmup_factor -> 1\n            return warmup_factor * (1 - alpha) + alpha\n        else:\n            current_step = (x - warmup_epochs * num_step)\n            cosine_steps = (epochs - warmup_epochs) * num_step\n            # warmup后lr倍率因子从1 -> end_factor\n            return ((1 + math.cos(current_step * math.pi / cosine_steps)) / 2) * (1 - end_factor) + end_factor\n\n    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)\n\n\ndef get_params_groups(model: torch.nn.Module, weight_decay: float = 1e-4):\n    params_group = [{\"params\": [], \"weight_decay\": 0.},  # no decay\n                    {\"params\": [], \"weight_decay\": weight_decay}]  # with decay\n\n    for name, param in model.named_parameters():\n        if not param.requires_grad:\n            continue  # frozen weights\n\n        if len(param.shape) == 1 or name.endswith(\".bias\"):\n            # bn:(weight,bias)  conv2d:(bias)  linear:(bias)\n            params_group[0][\"params\"].append(param)  # no decay\n        else:\n            params_group[1][\"params\"].append(param)  # with decay\n\n    return params_group\n"
  },
  {
    "path": "pytorch_segmentation/u2net/transforms.py",
    "content": "import random\nfrom typing import List, Union\nfrom torchvision.transforms import functional as F\nfrom torchvision.transforms import transforms as T\n\n\nclass Compose(object):\n    def __init__(self, transforms):\n        self.transforms = transforms\n\n    def __call__(self, image, target=None):\n        for t in self.transforms:\n            image, target = t(image, target)\n\n        return image, target\n\n\nclass ToTensor(object):\n    def __call__(self, image, target):\n        image = F.to_tensor(image)\n        target = F.to_tensor(target)\n        return image, target\n\n\nclass RandomHorizontalFlip(object):\n    def __init__(self, prob):\n        self.flip_prob = prob\n\n    def __call__(self, image, target):\n        if random.random() < self.flip_prob:\n            image = F.hflip(image)\n            target = F.hflip(target)\n        return image, target\n\n\nclass Normalize(object):\n    def __init__(self, mean, std):\n        self.mean = mean\n        self.std = std\n\n    def __call__(self, image, target):\n        image = F.normalize(image, mean=self.mean, std=self.std)\n        return image, target\n\n\nclass Resize(object):\n    def __init__(self, size: Union[int, List[int]], resize_mask: bool = True):\n        self.size = size  # [h, w]\n        self.resize_mask = resize_mask\n\n    def __call__(self, image, target=None):\n        image = F.resize(image, self.size)\n        if self.resize_mask is True:\n            target = F.resize(target, self.size)\n\n        return image, target\n\n\nclass RandomCrop(object):\n    def __init__(self, size: int):\n        self.size = size\n\n    def pad_if_smaller(self, img, fill=0):\n        # 如果图像最小边长小于给定size，则用数值fill进行padding\n        min_size = min(img.shape[-2:])\n        if min_size < self.size:\n            ow, oh = img.size\n            padh = self.size - oh if oh < self.size else 0\n            padw = self.size - ow if ow < self.size else 0\n            img = F.pad(img, [0, 0, padw, padh], fill=fill)\n        return img\n\n    def __call__(self, image, target):\n        image = self.pad_if_smaller(image)\n        target = self.pad_if_smaller(target)\n        crop_params = T.RandomCrop.get_params(image, (self.size, self.size))\n        image = F.crop(image, *crop_params)\n        target = F.crop(target, *crop_params)\n        return image, target\n"
  },
  {
    "path": "pytorch_segmentation/u2net/validation.py",
    "content": "import os\nfrom typing import Union, List\n\nimport torch\nfrom torch.utils import data\n\nfrom src import u2net_full\nfrom train_utils import evaluate\nfrom my_dataset import DUTSDataset\nimport transforms as T\n\n\nclass SODPresetEval:\n    def __init__(self, base_size: Union[int, List[int]], mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        self.transforms = T.Compose([\n            T.ToTensor(),\n            T.Resize(base_size, resize_mask=False),\n            T.Normalize(mean=mean, std=std),\n        ])\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n    assert os.path.exists(args.weights), f\"weights {args.weights} not found.\"\n\n    val_dataset = DUTSDataset(args.data_path, train=False, transforms=SODPresetEval([320, 320]))\n\n    num_workers = 4\n    val_data_loader = data.DataLoader(val_dataset,\n                                      batch_size=1,  # must be 1\n                                      num_workers=num_workers,\n                                      pin_memory=True,\n                                      shuffle=False,\n                                      collate_fn=val_dataset.collate_fn)\n\n    model = u2net_full()\n    pretrain_weights = torch.load(args.weights, map_location='cpu')\n    if \"model\" in pretrain_weights:\n        model.load_state_dict(pretrain_weights[\"model\"])\n    else:\n        model.load_state_dict(pretrain_weights)\n    model.to(device)\n\n    mae_metric, f1_metric = evaluate(model, val_data_loader, device=device)\n    print(mae_metric, f1_metric)\n\n\ndef parse_args():\n    import argparse\n    parser = argparse.ArgumentParser(description=\"pytorch u2net validation\")\n\n    parser.add_argument(\"--data-path\", default=\"./\", help=\"DUTS root\")\n    parser.add_argument(\"--weights\", default=\"./u2net_full.pth\")\n    parser.add_argument(\"--device\", default=\"cuda:0\", help=\"training device\")\n    parser.add_argument('--print-freq', default=10, type=int, help='print frequency')\n\n    args = parser.parse_args()\n\n    return args\n\n\nif __name__ == '__main__':\n    args = parse_args()\n    main(args)\n"
  },
  {
    "path": "pytorch_segmentation/unet/README.md",
    "content": "# U-Net(Convolutional Networks for Biomedical Image Segmentation)\n\n## 该项目主要参考以下开源仓库\n* [https://github.com/milesial/Pytorch-UNet](https://github.com/milesial/Pytorch-UNet)\n* [https://github.com/pytorch/vision](https://github.com/pytorch/vision)\n\n## 环境配置：\n* Python3.6/3.7/3.8\n* Pytorch1.10\n* Ubuntu或Centos(Windows暂不支持多GPU训练)\n* 最好使用GPU训练\n* 详细环境配置见`requirements.txt`\n\n## 文件结构：\n```\n  ├── src: 搭建U-Net模型代码\n  ├── train_utils: 训练、验证以及多GPU训练相关模块\n  ├── my_dataset.py: 自定义dataset用于读取DRIVE数据集(视网膜血管分割)\n  ├── train.py: 以单GPU为例进行训练\n  ├── train_multi_GPU.py: 针对使用多GPU的用户使用\n  ├── predict.py: 简易的预测脚本，使用训练好的权重进行预测测试\n  └── compute_mean_std.py: 统计数据集各通道的均值和标准差\n```\n\n## DRIVE数据集下载地址：\n* 官网地址： [https://drive.grand-challenge.org/](https://drive.grand-challenge.org/)\n* 百度云链接： [https://pan.baidu.com/s/1Tjkrx2B9FgoJk0KviA-rDw](https://pan.baidu.com/s/1Tjkrx2B9FgoJk0KviA-rDw)  密码: 8no8\n\n\n## 训练方法\n* 确保提前准备好数据集\n* 若要使用单GPU或者CPU训练，直接使用train.py训练脚本\n* 若要使用多GPU训练，使用`torchrun --nproc_per_node=8 train_multi_GPU.py`指令,`nproc_per_node`参数为使用GPU数量\n* 如果想指定使用哪些GPU设备可在指令前加上`CUDA_VISIBLE_DEVICES=0,3`(例如我只要使用设备中的第1块和第4块GPU设备)\n* `CUDA_VISIBLE_DEVICES=0,3 torchrun --nproc_per_node=2 train_multi_GPU.py`\n\n## 注意事项\n* 在使用训练脚本时，注意要将`--data-path`设置为自己存放`DRIVE`文件夹所在的**根目录**\n* 在使用预测脚本时，要将`weights_path`设置为你自己生成的权重路径。\n* 使用validation文件时，注意确保你的验证集或者测试集中必须包含每个类别的目标，并且使用时只需要修改`--num-classes`、`--data-path`和`--weights`即可，其他代码尽量不要改动\n\n## 使用U-Net在DRIVE数据集上训练得到的权重(仅供测试使用)\n- 链接: https://pan.baidu.com/s/1BOqkEpgt1XRqziyc941Hcw  密码: p50a\n\n## 如果对U-Net网络不了解的可参考我的bilibili\n* [https://www.bilibili.com/video/BV1Vq4y127fB/](https://www.bilibili.com/video/BV1Vq4y127fB/)\n\n\n## 进一步了解该项目，以及对U-Net代码的分析可参考我的bilibili\n* [https://b23.tv/PCJJmqN](https://b23.tv/PCJJmqN)\n\n## 本项目U-Net默认使用双线性插值做为上采样，结构图如下\n![u-net](unet.png)\n"
  },
  {
    "path": "pytorch_segmentation/unet/compute_mean_std.py",
    "content": "import os\nfrom PIL import Image\nimport numpy as np\n\n\ndef main():\n    img_channels = 3\n    img_dir = \"./DRIVE/training/images\"\n    roi_dir = \"./DRIVE/training/mask\"\n    assert os.path.exists(img_dir), f\"image dir: '{img_dir}' does not exist.\"\n    assert os.path.exists(roi_dir), f\"roi dir: '{roi_dir}' does not exist.\"\n\n    img_name_list = [i for i in os.listdir(img_dir) if i.endswith(\".tif\")]\n    cumulative_mean = np.zeros(img_channels)\n    cumulative_std = np.zeros(img_channels)\n    for img_name in img_name_list:\n        img_path = os.path.join(img_dir, img_name)\n        ori_path = os.path.join(roi_dir, img_name.replace(\".tif\", \"_mask.gif\"))\n        img = np.array(Image.open(img_path)) / 255.\n        roi_img = np.array(Image.open(ori_path).convert('L'))\n\n        img = img[roi_img == 255]\n        cumulative_mean += img.mean(axis=0)\n        cumulative_std += img.std(axis=0)\n\n    mean = cumulative_mean / len(img_name_list)\n    std = cumulative_std / len(img_name_list)\n    print(f\"mean: {mean}\")\n    print(f\"std: {std}\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_segmentation/unet/my_dataset.py",
    "content": "import os\nfrom PIL import Image\nimport numpy as np\nfrom torch.utils.data import Dataset\n\n\nclass DriveDataset(Dataset):\n    def __init__(self, root: str, train: bool, transforms=None):\n        super(DriveDataset, self).__init__()\n        self.flag = \"training\" if train else \"test\"\n        data_root = os.path.join(root, \"DRIVE\", self.flag)\n        assert os.path.exists(data_root), f\"path '{data_root}' does not exists.\"\n        self.transforms = transforms\n        img_names = [i for i in os.listdir(os.path.join(data_root, \"images\")) if i.endswith(\".tif\")]\n        self.img_list = [os.path.join(data_root, \"images\", i) for i in img_names]\n        self.manual = [os.path.join(data_root, \"1st_manual\", i.split(\"_\")[0] + \"_manual1.gif\")\n                       for i in img_names]\n        # check files\n        for i in self.manual:\n            if os.path.exists(i) is False:\n                raise FileNotFoundError(f\"file {i} does not exists.\")\n\n        self.roi_mask = [os.path.join(data_root, \"mask\", i.split(\"_\")[0] + f\"_{self.flag}_mask.gif\")\n                         for i in img_names]\n        # check files\n        for i in self.roi_mask:\n            if os.path.exists(i) is False:\n                raise FileNotFoundError(f\"file {i} does not exists.\")\n\n    def __getitem__(self, idx):\n        img = Image.open(self.img_list[idx]).convert('RGB')\n        manual = Image.open(self.manual[idx]).convert('L')\n        manual = np.array(manual) / 255\n        roi_mask = Image.open(self.roi_mask[idx]).convert('L')\n        roi_mask = 255 - np.array(roi_mask)\n        mask = np.clip(manual + roi_mask, a_min=0, a_max=255)\n\n        # 这里转回PIL的原因是，transforms中是对PIL数据进行处理\n        mask = Image.fromarray(mask)\n\n        if self.transforms is not None:\n            img, mask = self.transforms(img, mask)\n\n        return img, mask\n\n    def __len__(self):\n        return len(self.img_list)\n\n    @staticmethod\n    def collate_fn(batch):\n        images, targets = list(zip(*batch))\n        batched_imgs = cat_list(images, fill_value=0)\n        batched_targets = cat_list(targets, fill_value=255)\n        return batched_imgs, batched_targets\n\n\ndef cat_list(images, fill_value=0):\n    max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))\n    batch_shape = (len(images),) + max_size\n    batched_imgs = images[0].new(*batch_shape).fill_(fill_value)\n    for img, pad_img in zip(images, batched_imgs):\n        pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img)\n    return batched_imgs\n\n"
  },
  {
    "path": "pytorch_segmentation/unet/predict.py",
    "content": "import os\nimport time\n\nimport torch\nfrom torchvision import transforms\nimport numpy as np\nfrom PIL import Image\n\nfrom src import UNet\n\n\ndef time_synchronized():\n    torch.cuda.synchronize() if torch.cuda.is_available() else None\n    return time.time()\n\n\ndef main():\n    classes = 1  # exclude background\n    weights_path = \"./save_weights/best_model.pth\"\n    img_path = \"./DRIVE/test/images/01_test.tif\"\n    roi_mask_path = \"./DRIVE/test/mask/01_test_mask.gif\"\n    assert os.path.exists(weights_path), f\"weights {weights_path} not found.\"\n    assert os.path.exists(img_path), f\"image {img_path} not found.\"\n    assert os.path.exists(roi_mask_path), f\"image {roi_mask_path} not found.\"\n\n    mean = (0.709, 0.381, 0.224)\n    std = (0.127, 0.079, 0.043)\n\n    # get devices\n    device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n    print(\"using {} device.\".format(device))\n\n    # create model\n    model = UNet(in_channels=3, num_classes=classes+1, base_c=32)\n\n    # load weights\n    model.load_state_dict(torch.load(weights_path, map_location='cpu')['model'])\n    model.to(device)\n\n    # load roi mask\n    roi_img = Image.open(roi_mask_path).convert('L')\n    roi_img = np.array(roi_img)\n\n    # load image\n    original_img = Image.open(img_path).convert('RGB')\n\n    # from pil image to tensor and normalize\n    data_transform = transforms.Compose([transforms.ToTensor(),\n                                         transforms.Normalize(mean=mean, std=std)])\n    img = data_transform(original_img)\n    # expand batch dimension\n    img = torch.unsqueeze(img, dim=0)\n\n    model.eval()  # 进入验证模式\n    with torch.no_grad():\n        # init model\n        img_height, img_width = img.shape[-2:]\n        init_img = torch.zeros((1, 3, img_height, img_width), device=device)\n        model(init_img)\n\n        t_start = time_synchronized()\n        output = model(img.to(device))\n        t_end = time_synchronized()\n        print(\"inference time: {}\".format(t_end - t_start))\n\n        prediction = output['out'].argmax(1).squeeze(0)\n        prediction = prediction.to(\"cpu\").numpy().astype(np.uint8)\n        # 将前景对应的像素值改成255(白色)\n        prediction[prediction == 1] = 255\n        # 将不敢兴趣的区域像素设置成0(黑色)\n        prediction[roi_img == 0] = 0\n        mask = Image.fromarray(prediction)\n        mask.save(\"test_result.png\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "pytorch_segmentation/unet/requirements.txt",
    "content": "numpy==1.22.0\ntorch==1.13.1\ntorchvision==0.11.1\nPillow\n"
  },
  {
    "path": "pytorch_segmentation/unet/results20220109-165837.txt",
    "content": "[epoch: 0]\ntrain_loss: 1.3541\nlr: 0.010000\ndice coefficient: 0.111\nglobal correct: 71.1\naverage row correct: ['78.2', '22.4']\nIoU: ['70.2', '9.0']\nmean IoU: 39.6\n\n[epoch: 1]\ntrain_loss: 1.0442\nlr: 0.009955\ndice coefficient: 0.014\nglobal correct: 85.7\naverage row correct: ['98.0', '1.0']\nIoU: ['85.7', '0.9']\nmean IoU: 43.3\n\n[epoch: 2]\ntrain_loss: 0.9315\nlr: 0.009910\ndice coefficient: 0.000\nglobal correct: 87.2\naverage row correct: ['99.9', '0.0']\nIoU: ['87.2', '0.0']\nmean IoU: 43.6\n\n[epoch: 3]\ntrain_loss: 0.7929\nlr: 0.009864\ndice coefficient: 0.021\nglobal correct: 87.4\naverage row correct: ['100.0', '1.1']\nIoU: ['87.4', '1.1']\nmean IoU: 44.3\n\n[epoch: 4]\ntrain_loss: 0.7329\nlr: 0.009819\ndice coefficient: 0.210\nglobal correct: 88.8\naverage row correct: ['99.9', '12.5']\nIoU: ['88.6', '12.4']\nmean IoU: 50.5\n\n[epoch: 5]\ntrain_loss: 0.6343\nlr: 0.009774\ndice coefficient: 0.440\nglobal correct: 90.5\naverage row correct: ['99.3', '30.8']\nIoU: ['90.2', '29.3']\nmean IoU: 59.7\n\n[epoch: 6]\ntrain_loss: 0.6105\nlr: 0.009728\ndice coefficient: 0.575\nglobal correct: 91.8\naverage row correct: ['98.6', '45.5']\nIoU: ['91.3', '41.5']\nmean IoU: 66.4\n\n[epoch: 7]\ntrain_loss: 0.5982\nlr: 0.009683\ndice coefficient: 0.651\nglobal correct: 91.3\naverage row correct: ['95.1', '64.8']\nIoU: ['90.5', '48.5']\nmean IoU: 69.5\n\n[epoch: 8]\ntrain_loss: 0.6641\nlr: 0.009637\ndice coefficient: 0.651\nglobal correct: 90.7\naverage row correct: ['94.0', '67.7']\nIoU: ['89.8', '48.0']\nmean IoU: 68.9\n\n[epoch: 9]\ntrain_loss: 0.5530\nlr: 0.009592\ndice coefficient: 0.550\nglobal correct: 80.7\naverage row correct: ['81.0', '78.3']\nIoU: ['78.5', '34.0']\nmean IoU: 56.3\n\n[epoch: 10]\ntrain_loss: 0.5676\nlr: 0.009547\ndice coefficient: 0.659\nglobal correct: 89.2\naverage row correct: ['91.0', '77.0']\nIoU: ['88.1', '47.6']\nmean IoU: 67.8\n\n[epoch: 11]\ntrain_loss: 0.5494\nlr: 0.009501\ndice coefficient: 0.654\nglobal correct: 90.0\naverage row correct: ['92.8', '70.5']\nIoU: ['89.0', '47.3']\nmean IoU: 68.1\n\n[epoch: 12]\ntrain_loss: 0.5293\nlr: 0.009456\ndice coefficient: 0.713\nglobal correct: 91.9\naverage row correct: ['94.6', '73.2']\nIoU: ['91.1', '53.5']\nmean IoU: 72.3\n\n[epoch: 13]\ntrain_loss: 0.5291\nlr: 0.009410\ndice coefficient: 0.689\nglobal correct: 91.2\naverage row correct: ['93.6', '75.1']\nIoU: ['90.3', '52.2']\nmean IoU: 71.3\n\n[epoch: 14]\ntrain_loss: 0.5163\nlr: 0.009365\ndice coefficient: 0.691\nglobal correct: 92.6\naverage row correct: ['96.3', '67.0']\nIoU: ['91.9', '53.4']\nmean IoU: 72.6\n\n[epoch: 15]\ntrain_loss: 0.5168\nlr: 0.009319\ndice coefficient: 0.722\nglobal correct: 93.3\naverage row correct: ['96.6', '70.3']\nIoU: ['92.6', '57.1']\nmean IoU: 74.9\n\n[epoch: 16]\ntrain_loss: 0.5153\nlr: 0.009273\ndice coefficient: 0.740\nglobal correct: 94.0\naverage row correct: ['97.7', '68.3']\nIoU: ['93.4', '59.1']\nmean IoU: 76.3\n\n[epoch: 17]\ntrain_loss: 0.4923\nlr: 0.009228\ndice coefficient: 0.734\nglobal correct: 93.6\naverage row correct: ['96.9', '70.7']\nIoU: ['92.9', '58.4']\nmean IoU: 75.7\n\n[epoch: 18]\ntrain_loss: 0.4692\nlr: 0.009182\ndice coefficient: 0.740\nglobal correct: 93.7\naverage row correct: ['97.0', '71.2']\nIoU: ['93.1', '59.2']\nmean IoU: 76.1\n\n[epoch: 19]\ntrain_loss: 0.4701\nlr: 0.009136\ndice coefficient: 0.754\nglobal correct: 94.0\naverage row correct: ['97.1', '72.9']\nIoU: ['93.4', '60.8']\nmean IoU: 77.1\n\n[epoch: 20]\ntrain_loss: 0.4710\nlr: 0.009091\ndice coefficient: 0.761\nglobal correct: 94.0\naverage row correct: ['96.8', '75.0']\nIoU: ['93.4', '61.5']\nmean IoU: 77.5\n\n[epoch: 21]\ntrain_loss: 0.4624\nlr: 0.009045\ndice coefficient: 0.756\nglobal correct: 94.1\naverage row correct: ['97.3', '72.3']\nIoU: ['93.6', '61.1']\nmean IoU: 77.3\n\n[epoch: 22]\ntrain_loss: 0.4480\nlr: 0.008999\ndice coefficient: 0.759\nglobal correct: 94.3\naverage row correct: ['97.5', '71.9']\nIoU: ['93.7', '61.5']\nmean IoU: 77.6\n\n[epoch: 23]\ntrain_loss: 0.4342\nlr: 0.008954\ndice coefficient: 0.748\nglobal correct: 94.3\naverage row correct: ['98.1', '68.2']\nIoU: ['93.7', '60.2']\nmean IoU: 77.0\n\n[epoch: 24]\ntrain_loss: 0.4465\nlr: 0.008908\ndice coefficient: 0.771\nglobal correct: 94.5\naverage row correct: ['97.6', '73.4']\nIoU: ['93.9', '63.0']\nmean IoU: 78.5\n\n[epoch: 25]\ntrain_loss: 0.4295\nlr: 0.008862\ndice coefficient: 0.770\nglobal correct: 94.5\naverage row correct: ['97.6', '73.1']\nIoU: ['94.0', '62.9']\nmean IoU: 78.4\n\n[epoch: 26]\ntrain_loss: 0.4246\nlr: 0.008816\ndice coefficient: 0.768\nglobal correct: 94.6\naverage row correct: ['98.0', '71.2']\nIoU: ['94.1', '62.6']\nmean IoU: 78.3\n\n[epoch: 27]\ntrain_loss: 0.4180\nlr: 0.008770\ndice coefficient: 0.771\nglobal correct: 94.6\naverage row correct: ['97.9', '72.1']\nIoU: ['94.1', '63.0']\nmean IoU: 78.5\n\n[epoch: 28]\ntrain_loss: 0.4408\nlr: 0.008724\ndice coefficient: 0.775\nglobal correct: 94.5\naverage row correct: ['97.3', '75.3']\nIoU: ['93.9', '63.5']\nmean IoU: 78.7\n\n[epoch: 29]\ntrain_loss: 0.4323\nlr: 0.008678\ndice coefficient: 0.763\nglobal correct: 94.5\naverage row correct: ['97.9', '70.9']\nIoU: ['93.9', '62.0']\nmean IoU: 78.0\n\n[epoch: 30]\ntrain_loss: 0.4144\nlr: 0.008632\ndice coefficient: 0.772\nglobal correct: 94.3\naverage row correct: ['96.8', '76.9']\nIoU: ['93.7', '63.1']\nmean IoU: 78.4\n\n[epoch: 31]\ntrain_loss: 0.4130\nlr: 0.008586\ndice coefficient: 0.776\nglobal correct: 94.6\naverage row correct: ['97.6', '74.0']\nIoU: ['94.0', '63.6']\nmean IoU: 78.8\n\n[epoch: 32]\ntrain_loss: 0.4109\nlr: 0.008540\ndice coefficient: 0.776\nglobal correct: 94.6\naverage row correct: ['97.5', '74.8']\nIoU: ['94.0', '63.6']\nmean IoU: 78.8\n\n[epoch: 33]\ntrain_loss: 0.4190\nlr: 0.008494\ndice coefficient: 0.779\nglobal correct: 94.7\naverage row correct: ['97.6', '74.3']\nIoU: ['94.1', '64.0']\nmean IoU: 79.1\n\n[epoch: 34]\ntrain_loss: 0.4163\nlr: 0.008448\ndice coefficient: 0.773\nglobal correct: 94.6\naverage row correct: ['97.8', '72.6']\nIoU: ['94.1', '63.2']\nmean IoU: 78.6\n\n[epoch: 35]\ntrain_loss: 0.4064\nlr: 0.008402\ndice coefficient: 0.775\nglobal correct: 94.7\naverage row correct: ['98.0', '72.1']\nIoU: ['94.2', '63.5']\nmean IoU: 78.8\n\n[epoch: 36]\ntrain_loss: 0.3986\nlr: 0.008356\ndice coefficient: 0.785\nglobal correct: 94.7\naverage row correct: ['97.2', '77.1']\nIoU: ['94.1', '64.8']\nmean IoU: 79.4\n\n[epoch: 37]\ntrain_loss: 0.3959\nlr: 0.008310\ndice coefficient: 0.784\nglobal correct: 94.8\naverage row correct: ['97.7', '74.9']\nIoU: ['94.3', '64.7']\nmean IoU: 79.5\n\n[epoch: 38]\ntrain_loss: 0.4058\nlr: 0.008264\ndice coefficient: 0.786\nglobal correct: 94.7\naverage row correct: ['97.4', '76.4']\nIoU: ['94.2', '64.9']\nmean IoU: 79.5\n\n[epoch: 39]\ntrain_loss: 0.3934\nlr: 0.008218\ndice coefficient: 0.786\nglobal correct: 94.8\naverage row correct: ['97.5', '76.2']\nIoU: ['94.2', '64.9']\nmean IoU: 79.5\n\n[epoch: 40]\ntrain_loss: 0.3926\nlr: 0.008171\ndice coefficient: 0.783\nglobal correct: 94.7\naverage row correct: ['97.3', '76.4']\nIoU: ['94.1', '64.6']\nmean IoU: 79.3\n\n[epoch: 41]\ntrain_loss: 0.3880\nlr: 0.008125\ndice coefficient: 0.787\nglobal correct: 94.8\naverage row correct: ['97.6', '75.6']\nIoU: ['94.3', '65.1']\nmean IoU: 79.7\n\n[epoch: 42]\ntrain_loss: 0.3964\nlr: 0.008079\ndice coefficient: 0.788\nglobal correct: 94.8\naverage row correct: ['97.4', '77.1']\nIoU: ['94.2', '65.3']\nmean IoU: 79.7\n\n[epoch: 43]\ntrain_loss: 0.3980\nlr: 0.008032\ndice coefficient: 0.787\nglobal correct: 94.7\naverage row correct: ['97.3', '77.0']\nIoU: ['94.2', '65.1']\nmean IoU: 79.6\n\n[epoch: 44]\ntrain_loss: 0.3846\nlr: 0.007986\ndice coefficient: 0.787\nglobal correct: 94.7\naverage row correct: ['97.2', '77.4']\nIoU: ['94.1', '65.1']\nmean IoU: 79.6\n\n[epoch: 45]\ntrain_loss: 0.3832\nlr: 0.007940\ndice coefficient: 0.783\nglobal correct: 94.7\naverage row correct: ['97.5', '75.6']\nIoU: ['94.2', '64.6']\nmean IoU: 79.4\n\n[epoch: 46]\ntrain_loss: 0.3839\nlr: 0.007893\ndice coefficient: 0.789\nglobal correct: 94.9\naverage row correct: ['97.6', '76.1']\nIoU: ['94.3', '65.4']\nmean IoU: 79.8\n\n[epoch: 47]\ntrain_loss: 0.3739\nlr: 0.007847\ndice coefficient: 0.789\nglobal correct: 94.8\naverage row correct: ['97.4', '76.8']\nIoU: ['94.3', '65.4']\nmean IoU: 79.8\n\n[epoch: 48]\ntrain_loss: 0.4064\nlr: 0.007800\ndice coefficient: 0.783\nglobal correct: 94.7\naverage row correct: ['97.4', '76.0']\nIoU: ['94.1', '64.4']\nmean IoU: 79.3\n\n[epoch: 49]\ntrain_loss: 0.3878\nlr: 0.007754\ndice coefficient: 0.787\nglobal correct: 94.7\naverage row correct: ['97.3', '77.1']\nIoU: ['94.2', '65.1']\nmean IoU: 79.6\n\n[epoch: 50]\ntrain_loss: 0.3856\nlr: 0.007707\ndice coefficient: 0.788\nglobal correct: 94.8\naverage row correct: ['97.6', '76.2']\nIoU: ['94.3', '65.3']\nmean IoU: 79.8\n\n[epoch: 51]\ntrain_loss: 0.3883\nlr: 0.007661\ndice coefficient: 0.788\nglobal correct: 94.5\naverage row correct: ['96.5', '80.7']\nIoU: ['93.9', '65.2']\nmean IoU: 79.5\n\n[epoch: 52]\ntrain_loss: 0.3965\nlr: 0.007614\ndice coefficient: 0.791\nglobal correct: 94.8\naverage row correct: ['97.2', '78.1']\nIoU: ['94.2', '65.5']\nmean IoU: 79.9\n\n[epoch: 53]\ntrain_loss: 0.3851\nlr: 0.007567\ndice coefficient: 0.793\nglobal correct: 94.9\naverage row correct: ['97.4', '77.5']\nIoU: ['94.3', '65.9']\nmean IoU: 80.1\n\n[epoch: 54]\ntrain_loss: 0.3859\nlr: 0.007521\ndice coefficient: 0.790\nglobal correct: 94.9\naverage row correct: ['97.6', '76.2']\nIoU: ['94.3', '65.5']\nmean IoU: 79.9\n\n[epoch: 55]\ntrain_loss: 0.3801\nlr: 0.007474\ndice coefficient: 0.790\nglobal correct: 94.8\naverage row correct: ['97.4', '77.2']\nIoU: ['94.3', '65.5']\nmean IoU: 79.9\n\n[epoch: 56]\ntrain_loss: 0.3928\nlr: 0.007427\ndice coefficient: 0.786\nglobal correct: 94.9\naverage row correct: ['98.0', '73.7']\nIoU: ['94.4', '64.9']\nmean IoU: 79.7\n\n[epoch: 57]\ntrain_loss: 0.3930\nlr: 0.007381\ndice coefficient: 0.790\nglobal correct: 94.6\naverage row correct: ['96.8', '79.6']\nIoU: ['94.0', '65.4']\nmean IoU: 79.7\n\n[epoch: 58]\ntrain_loss: 0.3738\nlr: 0.007334\ndice coefficient: 0.789\nglobal correct: 94.9\naverage row correct: ['97.8', '75.0']\nIoU: ['94.4', '65.4']\nmean IoU: 79.9\n\n[epoch: 59]\ntrain_loss: 0.3706\nlr: 0.007287\ndice coefficient: 0.795\nglobal correct: 94.8\naverage row correct: ['97.1', '79.5']\nIoU: ['94.2', '66.1']\nmean IoU: 80.2\n\n[epoch: 60]\ntrain_loss: 0.3783\nlr: 0.007240\ndice coefficient: 0.795\nglobal correct: 95.0\naverage row correct: ['97.7', '76.7']\nIoU: ['94.5', '66.2']\nmean IoU: 80.3\n\n[epoch: 61]\ntrain_loss: 0.3656\nlr: 0.007193\ndice coefficient: 0.792\nglobal correct: 95.1\naverage row correct: ['98.0', '74.6']\nIoU: ['94.5', '65.7']\nmean IoU: 80.1\n\n[epoch: 62]\ntrain_loss: 0.3773\nlr: 0.007146\ndice coefficient: 0.796\nglobal correct: 95.0\naverage row correct: ['97.7', '76.6']\nIoU: ['94.5', '66.3']\nmean IoU: 80.4\n\n[epoch: 63]\ntrain_loss: 0.3703\nlr: 0.007099\ndice coefficient: 0.796\nglobal correct: 95.1\naverage row correct: ['97.9', '75.8']\nIoU: ['94.6', '66.2']\nmean IoU: 80.4\n\n[epoch: 64]\ntrain_loss: 0.3630\nlr: 0.007052\ndice coefficient: 0.794\nglobal correct: 94.8\naverage row correct: ['97.1', '79.4']\nIoU: ['94.2', '66.0']\nmean IoU: 80.1\n\n[epoch: 65]\ntrain_loss: 0.3680\nlr: 0.007005\ndice coefficient: 0.797\nglobal correct: 95.1\naverage row correct: ['97.7', '76.8']\nIoU: ['94.5', '66.4']\nmean IoU: 80.5\n\n[epoch: 66]\ntrain_loss: 0.3557\nlr: 0.006958\ndice coefficient: 0.799\nglobal correct: 95.1\naverage row correct: ['97.5', '77.9']\nIoU: ['94.5', '66.7']\nmean IoU: 80.6\n\n[epoch: 67]\ntrain_loss: 0.3759\nlr: 0.006911\ndice coefficient: 0.796\nglobal correct: 94.8\naverage row correct: ['97.0', '80.0']\nIoU: ['94.2', '66.3']\nmean IoU: 80.2\n\n[epoch: 68]\ntrain_loss: 0.3638\nlr: 0.006864\ndice coefficient: 0.790\nglobal correct: 95.0\naverage row correct: ['98.0', '74.3']\nIoU: ['94.5', '65.4']\nmean IoU: 79.9\n\n[epoch: 69]\ntrain_loss: 0.3540\nlr: 0.006817\ndice coefficient: 0.796\nglobal correct: 94.6\naverage row correct: ['96.3', '83.0']\nIoU: ['94.0', '66.2']\nmean IoU: 80.1\n\n[epoch: 70]\ntrain_loss: 0.3602\nlr: 0.006770\ndice coefficient: 0.798\nglobal correct: 94.9\naverage row correct: ['97.1', '79.7']\nIoU: ['94.3', '66.6']\nmean IoU: 80.4\n\n[epoch: 71]\ntrain_loss: 0.3597\nlr: 0.006722\ndice coefficient: 0.797\nglobal correct: 95.0\naverage row correct: ['97.6', '77.2']\nIoU: ['94.5', '66.5']\nmean IoU: 80.5\n\n[epoch: 72]\ntrain_loss: 0.3618\nlr: 0.006675\ndice coefficient: 0.802\nglobal correct: 95.1\naverage row correct: ['97.5', '78.8']\nIoU: ['94.5', '67.1']\nmean IoU: 80.8\n\n[epoch: 73]\ntrain_loss: 0.3582\nlr: 0.006628\ndice coefficient: 0.803\nglobal correct: 95.1\naverage row correct: ['97.4', '79.3']\nIoU: ['94.5', '67.2']\nmean IoU: 80.9\n\n[epoch: 74]\ntrain_loss: 0.3624\nlr: 0.006580\ndice coefficient: 0.800\nglobal correct: 95.1\naverage row correct: ['97.7', '77.3']\nIoU: ['94.6', '66.8']\nmean IoU: 80.7\n\n[epoch: 75]\ntrain_loss: 0.3648\nlr: 0.006533\ndice coefficient: 0.795\nglobal correct: 95.1\naverage row correct: ['98.2', '74.5']\nIoU: ['94.6', '66.1']\nmean IoU: 80.4\n\n[epoch: 76]\ntrain_loss: 0.3553\nlr: 0.006486\ndice coefficient: 0.801\nglobal correct: 95.0\naverage row correct: ['97.3', '79.6']\nIoU: ['94.4', '67.0']\nmean IoU: 80.7\n\n[epoch: 77]\ntrain_loss: 0.3632\nlr: 0.006438\ndice coefficient: 0.796\nglobal correct: 94.6\naverage row correct: ['96.5', '82.2']\nIoU: ['94.0', '66.1']\nmean IoU: 80.1\n\n[epoch: 78]\ntrain_loss: 0.3511\nlr: 0.006391\ndice coefficient: 0.801\nglobal correct: 95.2\naverage row correct: ['97.9', '76.5']\nIoU: ['94.7', '67.0']\nmean IoU: 80.8\n\n[epoch: 79]\ntrain_loss: 0.3602\nlr: 0.006343\ndice coefficient: 0.803\nglobal correct: 95.2\naverage row correct: ['97.8', '77.4']\nIoU: ['94.7', '67.2']\nmean IoU: 80.9\n\n[epoch: 80]\ntrain_loss: 0.3585\nlr: 0.006295\ndice coefficient: 0.801\nglobal correct: 94.9\naverage row correct: ['97.0', '80.8']\nIoU: ['94.3', '67.0']\nmean IoU: 80.7\n\n[epoch: 81]\ntrain_loss: 0.3543\nlr: 0.006248\ndice coefficient: 0.802\nglobal correct: 95.1\naverage row correct: ['97.5', '78.6']\nIoU: ['94.6', '67.1']\nmean IoU: 80.8\n\n[epoch: 82]\ntrain_loss: 0.3689\nlr: 0.006200\ndice coefficient: 0.804\nglobal correct: 95.1\naverage row correct: ['97.3', '79.8']\nIoU: ['94.5', '67.4']\nmean IoU: 80.9\n\n[epoch: 83]\ntrain_loss: 0.3588\nlr: 0.006152\ndice coefficient: 0.803\nglobal correct: 94.9\naverage row correct: ['96.9', '81.6']\nIoU: ['94.3', '67.2']\nmean IoU: 80.8\n\n[epoch: 84]\ntrain_loss: 0.3640\nlr: 0.006105\ndice coefficient: 0.798\nglobal correct: 94.9\naverage row correct: ['97.3', '78.9']\nIoU: ['94.4', '66.4']\nmean IoU: 80.4\n\n[epoch: 85]\ntrain_loss: 0.3635\nlr: 0.006057\ndice coefficient: 0.802\nglobal correct: 95.2\naverage row correct: ['97.7', '77.7']\nIoU: ['94.6', '67.1']\nmean IoU: 80.9\n\n[epoch: 86]\ntrain_loss: 0.3441\nlr: 0.006009\ndice coefficient: 0.802\nglobal correct: 95.2\naverage row correct: ['98.0', '76.1']\nIoU: ['94.7', '67.0']\nmean IoU: 80.9\n\n[epoch: 87]\ntrain_loss: 0.3553\nlr: 0.005961\ndice coefficient: 0.806\nglobal correct: 95.1\naverage row correct: ['97.4', '79.8']\nIoU: ['94.6', '67.6']\nmean IoU: 81.1\n\n[epoch: 88]\ntrain_loss: 0.3558\nlr: 0.005913\ndice coefficient: 0.804\nglobal correct: 95.0\naverage row correct: ['97.2', '80.4']\nIoU: ['94.5', '67.4']\nmean IoU: 80.9\n\n[epoch: 89]\ntrain_loss: 0.3638\nlr: 0.005865\ndice coefficient: 0.804\nglobal correct: 95.1\naverage row correct: ['97.6', '78.5']\nIoU: ['94.6', '67.3']\nmean IoU: 81.0\n\n[epoch: 90]\ntrain_loss: 0.3546\nlr: 0.005817\ndice coefficient: 0.804\nglobal correct: 95.1\naverage row correct: ['97.6', '78.5']\nIoU: ['94.6', '67.3']\nmean IoU: 80.9\n\n[epoch: 91]\ntrain_loss: 0.3587\nlr: 0.005769\ndice coefficient: 0.804\nglobal correct: 95.0\naverage row correct: ['97.0', '81.0']\nIoU: ['94.4', '67.3']\nmean IoU: 80.9\n\n[epoch: 92]\ntrain_loss: 0.3546\nlr: 0.005721\ndice coefficient: 0.804\nglobal correct: 95.0\naverage row correct: ['97.0', '81.2']\nIoU: ['94.4', '67.3']\nmean IoU: 80.9\n\n[epoch: 93]\ntrain_loss: 0.3505\nlr: 0.005673\ndice coefficient: 0.804\nglobal correct: 95.2\naverage row correct: ['97.6', '78.4']\nIoU: ['94.6', '67.3']\nmean IoU: 81.0\n\n[epoch: 94]\ntrain_loss: 0.3545\nlr: 0.005625\ndice coefficient: 0.803\nglobal correct: 95.0\naverage row correct: ['97.2', '80.2']\nIoU: ['94.5', '67.2']\nmean IoU: 80.9\n\n[epoch: 95]\ntrain_loss: 0.3497\nlr: 0.005577\ndice coefficient: 0.806\nglobal correct: 95.2\naverage row correct: ['97.8', '78.0']\nIoU: ['94.7', '67.6']\nmean IoU: 81.1\n\n[epoch: 96]\ntrain_loss: 0.3476\nlr: 0.005528\ndice coefficient: 0.806\nglobal correct: 95.1\naverage row correct: ['97.4', '79.9']\nIoU: ['94.6', '67.7']\nmean IoU: 81.1\n\n[epoch: 97]\ntrain_loss: 0.3479\nlr: 0.005480\ndice coefficient: 0.805\nglobal correct: 95.0\naverage row correct: ['96.9', '81.8']\nIoU: ['94.4', '67.5']\nmean IoU: 81.0\n\n[epoch: 98]\ntrain_loss: 0.3563\nlr: 0.005432\ndice coefficient: 0.807\nglobal correct: 95.1\naverage row correct: ['97.2', '80.6']\nIoU: ['94.6', '67.8']\nmean IoU: 81.2\n\n[epoch: 99]\ntrain_loss: 0.3444\nlr: 0.005383\ndice coefficient: 0.805\nglobal correct: 95.2\naverage row correct: ['97.7', '78.2']\nIoU: ['94.7', '67.5']\nmean IoU: 81.1\n\n[epoch: 100]\ntrain_loss: 0.3419\nlr: 0.005335\ndice coefficient: 0.805\nglobal correct: 95.1\naverage row correct: ['97.2', '80.4']\nIoU: ['94.5', '67.5']\nmean IoU: 81.0\n\n[epoch: 101]\ntrain_loss: 0.3504\nlr: 0.005286\ndice coefficient: 0.807\nglobal correct: 95.0\naverage row correct: ['96.9', '82.1']\nIoU: ['94.4', '67.7']\nmean IoU: 81.1\n\n[epoch: 102]\ntrain_loss: 0.3511\nlr: 0.005238\ndice coefficient: 0.802\nglobal correct: 95.0\naverage row correct: ['97.2', '79.7']\nIoU: ['94.4', '67.0']\nmean IoU: 80.7\n\n[epoch: 103]\ntrain_loss: 0.3431\nlr: 0.005189\ndice coefficient: 0.802\nglobal correct: 95.2\naverage row correct: ['98.0', '76.5']\nIoU: ['94.7', '67.1']\nmean IoU: 80.9\n\n[epoch: 104]\ntrain_loss: 0.3453\nlr: 0.005140\ndice coefficient: 0.805\nglobal correct: 95.2\naverage row correct: ['97.7', '78.0']\nIoU: ['94.7', '67.5']\nmean IoU: 81.1\n\n[epoch: 105]\ntrain_loss: 0.3475\nlr: 0.005092\ndice coefficient: 0.805\nglobal correct: 95.0\naverage row correct: ['97.0', '81.2']\nIoU: ['94.4', '67.5']\nmean IoU: 81.0\n\n[epoch: 106]\ntrain_loss: 0.3434\nlr: 0.005043\ndice coefficient: 0.806\nglobal correct: 95.1\naverage row correct: ['97.4', '79.7']\nIoU: ['94.6', '67.6']\nmean IoU: 81.1\n\n[epoch: 107]\ntrain_loss: 0.3426\nlr: 0.004994\ndice coefficient: 0.807\nglobal correct: 95.2\naverage row correct: ['97.6', '78.8']\nIoU: ['94.7', '67.8']\nmean IoU: 81.2\n\n[epoch: 108]\ntrain_loss: 0.3372\nlr: 0.004945\ndice coefficient: 0.807\nglobal correct: 95.0\naverage row correct: ['96.9', '82.1']\nIoU: ['94.4', '67.8']\nmean IoU: 81.1\n\n[epoch: 109]\ntrain_loss: 0.3474\nlr: 0.004896\ndice coefficient: 0.805\nglobal correct: 95.2\naverage row correct: ['97.5', '79.1']\nIoU: ['94.6', '67.5']\nmean IoU: 81.1\n\n[epoch: 110]\ntrain_loss: 0.3393\nlr: 0.004847\ndice coefficient: 0.804\nglobal correct: 95.1\naverage row correct: ['97.4', '79.4']\nIoU: ['94.5', '67.3']\nmean IoU: 80.9\n\n[epoch: 111]\ntrain_loss: 0.3381\nlr: 0.004798\ndice coefficient: 0.808\nglobal correct: 95.2\naverage row correct: ['97.4', '79.9']\nIoU: ['94.6', '67.8']\nmean IoU: 81.2\n\n[epoch: 112]\ntrain_loss: 0.3464\nlr: 0.004749\ndice coefficient: 0.808\nglobal correct: 95.2\naverage row correct: ['97.3', '80.4']\nIoU: ['94.6', '68.0']\nmean IoU: 81.3\n\n[epoch: 113]\ntrain_loss: 0.3397\nlr: 0.004700\ndice coefficient: 0.806\nglobal correct: 95.3\naverage row correct: ['97.8', '77.7']\nIoU: ['94.8', '67.7']\nmean IoU: 81.2\n\n[epoch: 114]\ntrain_loss: 0.3409\nlr: 0.004651\ndice coefficient: 0.808\nglobal correct: 95.1\naverage row correct: ['97.0', '81.7']\nIoU: ['94.5', '67.9']\nmean IoU: 81.2\n\n[epoch: 115]\ntrain_loss: 0.3396\nlr: 0.004601\ndice coefficient: 0.809\nglobal correct: 95.2\naverage row correct: ['97.5', '79.8']\nIoU: ['94.7', '68.1']\nmean IoU: 81.4\n\n[epoch: 116]\ntrain_loss: 0.3402\nlr: 0.004552\ndice coefficient: 0.810\nglobal correct: 95.3\naverage row correct: ['97.5', '79.8']\nIoU: ['94.7', '68.2']\nmean IoU: 81.5\n\n[epoch: 117]\ntrain_loss: 0.3444\nlr: 0.004503\ndice coefficient: 0.810\nglobal correct: 95.1\naverage row correct: ['97.0', '82.2']\nIoU: ['94.5', '68.2']\nmean IoU: 81.3\n\n[epoch: 118]\ntrain_loss: 0.3391\nlr: 0.004453\ndice coefficient: 0.809\nglobal correct: 95.1\naverage row correct: ['97.1', '81.7']\nIoU: ['94.5', '68.0']\nmean IoU: 81.3\n\n[epoch: 119]\ntrain_loss: 0.3360\nlr: 0.004404\ndice coefficient: 0.810\nglobal correct: 95.2\naverage row correct: ['97.4', '80.5']\nIoU: ['94.7', '68.2']\nmean IoU: 81.4\n\n[epoch: 120]\ntrain_loss: 0.3418\nlr: 0.004354\ndice coefficient: 0.810\nglobal correct: 95.2\naverage row correct: ['97.3', '80.6']\nIoU: ['94.7', '68.2']\nmean IoU: 81.4\n\n[epoch: 121]\ntrain_loss: 0.3308\nlr: 0.004304\ndice coefficient: 0.809\nglobal correct: 95.1\naverage row correct: ['97.2', '81.1']\nIoU: ['94.6', '68.0']\nmean IoU: 81.3\n\n[epoch: 122]\ntrain_loss: 0.3440\nlr: 0.004255\ndice coefficient: 0.808\nglobal correct: 95.2\naverage row correct: ['97.4', '80.2']\nIoU: ['94.6', '67.9']\nmean IoU: 81.3\n\n[epoch: 123]\ntrain_loss: 0.3344\nlr: 0.004205\ndice coefficient: 0.810\nglobal correct: 95.2\naverage row correct: ['97.3', '80.8']\nIoU: ['94.6', '68.2']\nmean IoU: 81.4\n\n[epoch: 124]\ntrain_loss: 0.3282\nlr: 0.004155\ndice coefficient: 0.811\nglobal correct: 95.2\naverage row correct: ['97.4', '80.7']\nIoU: ['94.7', '68.4']\nmean IoU: 81.5\n\n[epoch: 125]\ntrain_loss: 0.3342\nlr: 0.004105\ndice coefficient: 0.809\nglobal correct: 95.2\naverage row correct: ['97.5', '79.7']\nIoU: ['94.7', '68.1']\nmean IoU: 81.4\n\n[epoch: 126]\ntrain_loss: 0.3411\nlr: 0.004055\ndice coefficient: 0.809\nglobal correct: 95.2\naverage row correct: ['97.5', '79.7']\nIoU: ['94.7', '68.0']\nmean IoU: 81.4\n\n[epoch: 127]\ntrain_loss: 0.3415\nlr: 0.004005\ndice coefficient: 0.809\nglobal correct: 95.3\naverage row correct: ['97.6', '79.2']\nIoU: ['94.7', '68.0']\nmean IoU: 81.4\n\n[epoch: 128]\ntrain_loss: 0.3360\nlr: 0.003955\ndice coefficient: 0.808\nglobal correct: 95.3\naverage row correct: ['97.8', '78.3']\nIoU: ['94.8', '67.8']\nmean IoU: 81.3\n\n[epoch: 129]\ntrain_loss: 0.3323\nlr: 0.003905\ndice coefficient: 0.808\nglobal correct: 95.0\naverage row correct: ['96.9', '82.2']\nIoU: ['94.5', '67.9']\nmean IoU: 81.2\n\n[epoch: 130]\ntrain_loss: 0.3427\nlr: 0.003855\ndice coefficient: 0.807\nglobal correct: 94.9\naverage row correct: ['96.7', '83.2']\nIoU: ['94.3', '67.7']\nmean IoU: 81.0\n\n[epoch: 131]\ntrain_loss: 0.3402\nlr: 0.003804\ndice coefficient: 0.808\nglobal correct: 95.1\naverage row correct: ['97.0', '82.0']\nIoU: ['94.5', '67.9']\nmean IoU: 81.2\n\n[epoch: 132]\ntrain_loss: 0.3388\nlr: 0.003754\ndice coefficient: 0.809\nglobal correct: 95.2\naverage row correct: ['97.3', '80.7']\nIoU: ['94.6', '68.0']\nmean IoU: 81.3\n\n[epoch: 133]\ntrain_loss: 0.3366\nlr: 0.003704\ndice coefficient: 0.802\nglobal correct: 94.8\naverage row correct: ['96.4', '83.4']\nIoU: ['94.2', '67.1']\nmean IoU: 80.6\n\n[epoch: 134]\ntrain_loss: 0.3347\nlr: 0.003653\ndice coefficient: 0.809\nglobal correct: 95.2\naverage row correct: ['97.3', '80.7']\nIoU: ['94.6', '68.0']\nmean IoU: 81.3\n\n[epoch: 135]\ntrain_loss: 0.3405\nlr: 0.003602\ndice coefficient: 0.811\nglobal correct: 95.2\naverage row correct: ['97.2', '81.2']\nIoU: ['94.6', '68.3']\nmean IoU: 81.5\n\n[epoch: 136]\ntrain_loss: 0.3342\nlr: 0.003552\ndice coefficient: 0.812\nglobal correct: 95.2\naverage row correct: ['97.3', '81.1']\nIoU: ['94.7', '68.5']\nmean IoU: 81.6\n\n[epoch: 137]\ntrain_loss: 0.3309\nlr: 0.003501\ndice coefficient: 0.812\nglobal correct: 95.3\naverage row correct: ['97.5', '80.2']\nIoU: ['94.8', '68.4']\nmean IoU: 81.6\n\n[epoch: 138]\ntrain_loss: 0.3281\nlr: 0.003450\ndice coefficient: 0.810\nglobal correct: 95.3\naverage row correct: ['97.5', '79.8']\nIoU: ['94.7', '68.2']\nmean IoU: 81.5\n\n[epoch: 139]\ntrain_loss: 0.3283\nlr: 0.003399\ndice coefficient: 0.811\nglobal correct: 95.2\naverage row correct: ['97.2', '81.6']\nIoU: ['94.6', '68.3']\nmean IoU: 81.5\n\n[epoch: 140]\ntrain_loss: 0.3344\nlr: 0.003348\ndice coefficient: 0.811\nglobal correct: 95.3\naverage row correct: ['97.5', '80.1']\nIoU: ['94.7', '68.4']\nmean IoU: 81.6\n\n[epoch: 141]\ntrain_loss: 0.3331\nlr: 0.003297\ndice coefficient: 0.810\nglobal correct: 95.3\naverage row correct: ['97.7', '78.8']\nIoU: ['94.8', '68.2']\nmean IoU: 81.5\n\n[epoch: 142]\ntrain_loss: 0.3339\nlr: 0.003246\ndice coefficient: 0.811\nglobal correct: 95.2\naverage row correct: ['97.3', '81.0']\nIoU: ['94.7', '68.2']\nmean IoU: 81.4\n\n[epoch: 143]\ntrain_loss: 0.3274\nlr: 0.003194\ndice coefficient: 0.810\nglobal correct: 95.1\naverage row correct: ['97.1', '81.7']\nIoU: ['94.6', '68.1']\nmean IoU: 81.3\n\n[epoch: 144]\ntrain_loss: 0.3410\nlr: 0.003143\ndice coefficient: 0.807\nglobal correct: 94.9\naverage row correct: ['96.3', '84.7']\nIoU: ['94.2', '67.7']\nmean IoU: 81.0\n\n[epoch: 145]\ntrain_loss: 0.3397\nlr: 0.003092\ndice coefficient: 0.802\nglobal correct: 94.7\naverage row correct: ['96.1', '84.9']\nIoU: ['94.0', '67.0']\nmean IoU: 80.5\n\n[epoch: 146]\ntrain_loss: 0.3273\nlr: 0.003040\ndice coefficient: 0.811\nglobal correct: 95.2\naverage row correct: ['97.2', '81.7']\nIoU: ['94.6', '68.4']\nmean IoU: 81.5\n\n[epoch: 147]\ntrain_loss: 0.3300\nlr: 0.002988\ndice coefficient: 0.810\nglobal correct: 95.1\naverage row correct: ['96.9', '82.7']\nIoU: ['94.5', '68.2']\nmean IoU: 81.3\n\n[epoch: 148]\ntrain_loss: 0.3318\nlr: 0.002937\ndice coefficient: 0.811\nglobal correct: 95.2\naverage row correct: ['97.2', '81.3']\nIoU: ['94.6', '68.3']\nmean IoU: 81.4\n\n[epoch: 149]\ntrain_loss: 0.3350\nlr: 0.002885\ndice coefficient: 0.810\nglobal correct: 95.2\naverage row correct: ['97.5', '80.1']\nIoU: ['94.7', '68.2']\nmean IoU: 81.4\n\n[epoch: 150]\ntrain_loss: 0.3335\nlr: 0.002833\ndice coefficient: 0.811\nglobal correct: 95.2\naverage row correct: ['97.4', '80.4']\nIoU: ['94.7', '68.2']\nmean IoU: 81.5\n\n[epoch: 151]\ntrain_loss: 0.3259\nlr: 0.002781\ndice coefficient: 0.810\nglobal correct: 95.1\naverage row correct: ['97.1', '81.9']\nIoU: ['94.6', '68.2']\nmean IoU: 81.4\n\n[epoch: 152]\ntrain_loss: 0.3304\nlr: 0.002728\ndice coefficient: 0.810\nglobal correct: 95.1\naverage row correct: ['97.1', '81.6']\nIoU: ['94.6', '68.2']\nmean IoU: 81.4\n\n[epoch: 153]\ntrain_loss: 0.3352\nlr: 0.002676\ndice coefficient: 0.811\nglobal correct: 95.2\naverage row correct: ['97.4', '80.6']\nIoU: ['94.7', '68.3']\nmean IoU: 81.5\n\n[epoch: 154]\ntrain_loss: 0.3272\nlr: 0.002624\ndice coefficient: 0.811\nglobal correct: 95.2\naverage row correct: ['97.1', '82.0']\nIoU: ['94.6', '68.3']\nmean IoU: 81.5\n\n[epoch: 155]\ntrain_loss: 0.3339\nlr: 0.002571\ndice coefficient: 0.811\nglobal correct: 95.2\naverage row correct: ['97.2', '81.6']\nIoU: ['94.6', '68.4']\nmean IoU: 81.5\n\n[epoch: 156]\ntrain_loss: 0.3288\nlr: 0.002519\ndice coefficient: 0.812\nglobal correct: 95.3\naverage row correct: ['97.4', '80.5']\nIoU: ['94.7', '68.4']\nmean IoU: 81.6\n\n[epoch: 157]\ntrain_loss: 0.3247\nlr: 0.002466\ndice coefficient: 0.812\nglobal correct: 95.2\naverage row correct: ['97.3', '81.0']\nIoU: ['94.7', '68.4']\nmean IoU: 81.6\n\n[epoch: 158]\ntrain_loss: 0.3381\nlr: 0.002413\ndice coefficient: 0.812\nglobal correct: 95.2\naverage row correct: ['97.2', '81.7']\nIoU: ['94.7', '68.5']\nmean IoU: 81.6\n\n[epoch: 159]\ntrain_loss: 0.3318\nlr: 0.002360\ndice coefficient: 0.812\nglobal correct: 95.2\naverage row correct: ['97.0', '82.8']\nIoU: ['94.6', '68.5']\nmean IoU: 81.5\n\n[epoch: 160]\ntrain_loss: 0.3281\nlr: 0.002307\ndice coefficient: 0.813\nglobal correct: 95.3\naverage row correct: ['97.3', '81.1']\nIoU: ['94.7', '68.6']\nmean IoU: 81.7\n\n[epoch: 161]\ntrain_loss: 0.3322\nlr: 0.002253\ndice coefficient: 0.813\nglobal correct: 95.3\naverage row correct: ['97.4', '80.9']\nIoU: ['94.7', '68.6']\nmean IoU: 81.7\n\n[epoch: 162]\ntrain_loss: 0.3288\nlr: 0.002200\ndice coefficient: 0.812\nglobal correct: 95.2\naverage row correct: ['97.1', '81.8']\nIoU: ['94.6', '68.4']\nmean IoU: 81.5\n\n[epoch: 163]\ntrain_loss: 0.3301\nlr: 0.002146\ndice coefficient: 0.811\nglobal correct: 95.2\naverage row correct: ['97.4', '80.5']\nIoU: ['94.7', '68.3']\nmean IoU: 81.5\n\n[epoch: 164]\ntrain_loss: 0.3272\nlr: 0.002093\ndice coefficient: 0.809\nglobal correct: 95.3\naverage row correct: ['97.7', '78.6']\nIoU: ['94.8', '68.0']\nmean IoU: 81.4\n\n[epoch: 165]\ntrain_loss: 0.3313\nlr: 0.002039\ndice coefficient: 0.811\nglobal correct: 95.3\naverage row correct: ['97.6', '79.8']\nIoU: ['94.8', '68.3']\nmean IoU: 81.6\n\n[epoch: 166]\ntrain_loss: 0.3281\nlr: 0.001985\ndice coefficient: 0.811\nglobal correct: 95.2\naverage row correct: ['97.1', '81.8']\nIoU: ['94.6', '68.3']\nmean IoU: 81.5\n\n[epoch: 167]\ntrain_loss: 0.3335\nlr: 0.001930\ndice coefficient: 0.813\nglobal correct: 95.2\naverage row correct: ['97.2', '82.0']\nIoU: ['94.7', '68.6']\nmean IoU: 81.6\n\n[epoch: 168]\ntrain_loss: 0.3280\nlr: 0.001876\ndice coefficient: 0.813\nglobal correct: 95.3\naverage row correct: ['97.3', '81.2']\nIoU: ['94.7', '68.6']\nmean IoU: 81.6\n\n[epoch: 169]\ntrain_loss: 0.3346\nlr: 0.001822\ndice coefficient: 0.813\nglobal correct: 95.3\naverage row correct: ['97.4', '81.0']\nIoU: ['94.8', '68.6']\nmean IoU: 81.7\n\n[epoch: 170]\ntrain_loss: 0.3314\nlr: 0.001767\ndice coefficient: 0.813\nglobal correct: 95.2\naverage row correct: ['97.1', '82.1']\nIoU: ['94.6', '68.6']\nmean IoU: 81.6\n\n[epoch: 171]\ntrain_loss: 0.3287\nlr: 0.001712\ndice coefficient: 0.813\nglobal correct: 95.2\naverage row correct: ['97.1', '82.1']\nIoU: ['94.7', '68.6']\nmean IoU: 81.6\n\n[epoch: 172]\ntrain_loss: 0.3258\nlr: 0.001657\ndice coefficient: 0.813\nglobal correct: 95.2\naverage row correct: ['97.1', '82.0']\nIoU: ['94.7', '68.6']\nmean IoU: 81.6\n\n[epoch: 173]\ntrain_loss: 0.3413\nlr: 0.001601\ndice coefficient: 0.812\nglobal correct: 95.2\naverage row correct: ['97.3', '81.3']\nIoU: ['94.7', '68.5']\nmean IoU: 81.6\n\n[epoch: 174]\ntrain_loss: 0.3314\nlr: 0.001546\ndice coefficient: 0.812\nglobal correct: 95.2\naverage row correct: ['97.0', '82.6']\nIoU: ['94.6', '68.4']\nmean IoU: 81.5\n\n[epoch: 175]\ntrain_loss: 0.3314\nlr: 0.001490\ndice coefficient: 0.812\nglobal correct: 95.1\naverage row correct: ['96.9', '83.0']\nIoU: ['94.6', '68.5']\nmean IoU: 81.5\n\n[epoch: 176]\ntrain_loss: 0.3302\nlr: 0.001434\ndice coefficient: 0.813\nglobal correct: 95.2\naverage row correct: ['97.1', '82.0']\nIoU: ['94.7', '68.6']\nmean IoU: 81.6\n\n[epoch: 177]\ntrain_loss: 0.3378\nlr: 0.001378\ndice coefficient: 0.813\nglobal correct: 95.3\naverage row correct: ['97.4', '81.0']\nIoU: ['94.7', '68.6']\nmean IoU: 81.7\n\n[epoch: 178]\ntrain_loss: 0.3316\nlr: 0.001321\ndice coefficient: 0.812\nglobal correct: 95.3\naverage row correct: ['97.3', '81.0']\nIoU: ['94.7', '68.5']\nmean IoU: 81.6\n\n[epoch: 179]\ntrain_loss: 0.3241\nlr: 0.001265\ndice coefficient: 0.812\nglobal correct: 95.2\naverage row correct: ['97.2', '81.5']\nIoU: ['94.7', '68.4']\nmean IoU: 81.5\n\n[epoch: 180]\ntrain_loss: 0.3229\nlr: 0.001208\ndice coefficient: 0.810\nglobal correct: 95.1\naverage row correct: ['96.9', '82.8']\nIoU: ['94.5', '68.2']\nmean IoU: 81.3\n\n[epoch: 181]\ntrain_loss: 0.3339\nlr: 0.001150\ndice coefficient: 0.810\nglobal correct: 95.1\naverage row correct: ['96.8', '83.2']\nIoU: ['94.5', '68.1']\nmean IoU: 81.3\n\n[epoch: 182]\ntrain_loss: 0.3231\nlr: 0.001093\ndice coefficient: 0.810\nglobal correct: 95.1\naverage row correct: ['96.9', '82.8']\nIoU: ['94.5', '68.2']\nmean IoU: 81.4\n\n[epoch: 183]\ntrain_loss: 0.3320\nlr: 0.001035\ndice coefficient: 0.811\nglobal correct: 95.1\naverage row correct: ['96.9', '82.6']\nIoU: ['94.5', '68.3']\nmean IoU: 81.4\n\n[epoch: 184]\ntrain_loss: 0.3238\nlr: 0.000976\ndice coefficient: 0.812\nglobal correct: 95.2\naverage row correct: ['97.2', '81.7']\nIoU: ['94.6', '68.5']\nmean IoU: 81.6\n\n[epoch: 185]\ntrain_loss: 0.3318\nlr: 0.000917\ndice coefficient: 0.812\nglobal correct: 95.3\naverage row correct: ['97.3', '81.0']\nIoU: ['94.7', '68.5']\nmean IoU: 81.6\n\n[epoch: 186]\ntrain_loss: 0.3272\nlr: 0.000858\ndice coefficient: 0.812\nglobal correct: 95.3\naverage row correct: ['97.4', '80.5']\nIoU: ['94.7', '68.5']\nmean IoU: 81.6\n\n[epoch: 187]\ntrain_loss: 0.3309\nlr: 0.000799\ndice coefficient: 0.812\nglobal correct: 95.3\naverage row correct: ['97.4', '80.8']\nIoU: ['94.7', '68.5']\nmean IoU: 81.6\n\n[epoch: 188]\ntrain_loss: 0.3290\nlr: 0.000738\ndice coefficient: 0.812\nglobal correct: 95.2\naverage row correct: ['97.3', '81.3']\nIoU: ['94.7', '68.5']\nmean IoU: 81.6\n\n[epoch: 189]\ntrain_loss: 0.3338\nlr: 0.000678\ndice coefficient: 0.812\nglobal correct: 95.2\naverage row correct: ['97.1', '82.1']\nIoU: ['94.6', '68.4']\nmean IoU: 81.5\n\n[epoch: 190]\ntrain_loss: 0.3240\nlr: 0.000616\ndice coefficient: 0.812\nglobal correct: 95.2\naverage row correct: ['97.1', '82.0']\nIoU: ['94.6', '68.4']\nmean IoU: 81.5\n\n[epoch: 191]\ntrain_loss: 0.3227\nlr: 0.000554\ndice coefficient: 0.813\nglobal correct: 95.2\naverage row correct: ['97.2', '81.7']\nIoU: ['94.7', '68.5']\nmean IoU: 81.6\n\n[epoch: 192]\ntrain_loss: 0.3224\nlr: 0.000492\ndice coefficient: 0.813\nglobal correct: 95.2\naverage row correct: ['97.3', '81.4']\nIoU: ['94.7', '68.5']\nmean IoU: 81.6\n\n[epoch: 193]\ntrain_loss: 0.3254\nlr: 0.000428\ndice coefficient: 0.813\nglobal correct: 95.3\naverage row correct: ['97.3', '81.3']\nIoU: ['94.7', '68.5']\nmean IoU: 81.6\n\n[epoch: 194]\ntrain_loss: 0.3269\nlr: 0.000363\ndice coefficient: 0.813\nglobal correct: 95.2\naverage row correct: ['97.3', '81.4']\nIoU: ['94.7', '68.6']\nmean IoU: 81.6\n\n[epoch: 195]\ntrain_loss: 0.3352\nlr: 0.000297\ndice coefficient: 0.813\nglobal correct: 95.2\naverage row correct: ['97.2', '81.9']\nIoU: ['94.7', '68.5']\nmean IoU: 81.6\n\n[epoch: 196]\ntrain_loss: 0.3217\nlr: 0.000229\ndice coefficient: 0.813\nglobal correct: 95.2\naverage row correct: ['97.2', '81.7']\nIoU: ['94.7', '68.6']\nmean IoU: 81.6\n\n[epoch: 197]\ntrain_loss: 0.3253\nlr: 0.000159\ndice coefficient: 0.813\nglobal correct: 95.2\naverage row correct: ['97.2', '81.7']\nIoU: ['94.7', '68.6']\nmean IoU: 81.6\n\n[epoch: 198]\ntrain_loss: 0.3281\nlr: 0.000085\ndice coefficient: 0.813\nglobal correct: 95.2\naverage row correct: ['97.2', '81.6']\nIoU: ['94.7', '68.6']\nmean IoU: 81.6\n\n[epoch: 199]\ntrain_loss: 0.3214\nlr: 0.000000\ndice coefficient: 0.813\nglobal correct: 95.2\naverage row correct: ['97.2', '81.5']\nIoU: ['94.7', '68.6']\nmean IoU: 81.6\n\n"
  },
  {
    "path": "pytorch_segmentation/unet/src/__init__.py",
    "content": "from .unet import UNet\nfrom .mobilenet_unet import MobileV3Unet\nfrom .vgg_unet import VGG16UNet\n"
  },
  {
    "path": "pytorch_segmentation/unet/src/mobilenet_unet.py",
    "content": "from collections import OrderedDict\nfrom typing import Dict\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torch import Tensor\nfrom torchvision.models import mobilenet_v3_large\nfrom .unet import Up, OutConv\n\n\nclass IntermediateLayerGetter(nn.ModuleDict):\n    \"\"\"\n    Module wrapper that returns intermediate layers from a model\n\n    It has a strong assumption that the modules have been registered\n    into the model in the same order as they are used.\n    This means that one should **not** reuse the same nn.Module\n    twice in the forward if you want this to work.\n\n    Additionally, it is only able to query submodules that are directly\n    assigned to the model. So if `model` is passed, `model.feature1` can\n    be returned, but not `model.feature1.layer2`.\n\n    Args:\n        model (nn.Module): model on which we will extract the features\n        return_layers (Dict[name, new_name]): a dict containing the names\n            of the modules for which the activations will be returned as\n            the key of the dict, and the value of the dict is the name\n            of the returned activation (which the user can specify).\n    \"\"\"\n    _version = 2\n    __annotations__ = {\n        \"return_layers\": Dict[str, str],\n    }\n\n    def __init__(self, model: nn.Module, return_layers: Dict[str, str]) -> None:\n        if not set(return_layers).issubset([name for name, _ in model.named_children()]):\n            raise ValueError(\"return_layers are not present in model\")\n        orig_return_layers = return_layers\n        return_layers = {str(k): str(v) for k, v in return_layers.items()}\n\n        # 重新构建backbone，将没有使用到的模块全部删掉\n        layers = OrderedDict()\n        for name, module in model.named_children():\n            layers[name] = module\n            if name in return_layers:\n                del return_layers[name]\n            if not return_layers:\n                break\n\n        super(IntermediateLayerGetter, self).__init__(layers)\n        self.return_layers = orig_return_layers\n\n    def forward(self, x: Tensor) -> Dict[str, Tensor]:\n        out = OrderedDict()\n        for name, module in self.items():\n            x = module(x)\n            if name in self.return_layers:\n                out_name = self.return_layers[name]\n                out[out_name] = x\n        return out\n\n\nclass MobileV3Unet(nn.Module):\n    def __init__(self, num_classes, pretrain_backbone: bool = False):\n        super(MobileV3Unet, self).__init__()\n        backbone = mobilenet_v3_large(pretrained=pretrain_backbone)\n\n        # if pretrain_backbone:\n        #     # 载入mobilenetv3 large backbone预训练权重\n        #     # https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth\n        #     backbone.load_state_dict(torch.load(\"mobilenet_v3_large.pth\", map_location='cpu'))\n\n        backbone = backbone.features\n\n        stage_indices = [1, 3, 6, 12, 15]\n        self.stage_out_channels = [backbone[i].out_channels for i in stage_indices]\n        return_layers = dict([(str(j), f\"stage{i}\") for i, j in enumerate(stage_indices)])\n        self.backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)\n\n        c = self.stage_out_channels[4] + self.stage_out_channels[3]\n        self.up1 = Up(c, self.stage_out_channels[3])\n        c = self.stage_out_channels[3] + self.stage_out_channels[2]\n        self.up2 = Up(c, self.stage_out_channels[2])\n        c = self.stage_out_channels[2] + self.stage_out_channels[1]\n        self.up3 = Up(c, self.stage_out_channels[1])\n        c = self.stage_out_channels[1] + self.stage_out_channels[0]\n        self.up4 = Up(c, self.stage_out_channels[0])\n        self.conv = OutConv(self.stage_out_channels[0], num_classes=num_classes)\n\n    def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]:\n        input_shape = x.shape[-2:]\n        backbone_out = self.backbone(x)\n        x = self.up1(backbone_out['stage4'], backbone_out['stage3'])\n        x = self.up2(x, backbone_out['stage2'])\n        x = self.up3(x, backbone_out['stage1'])\n        x = self.up4(x, backbone_out['stage0'])\n        x = self.conv(x)\n        x = F.interpolate(x, size=input_shape, mode=\"bilinear\", align_corners=False)\n\n        return {\"out\": x}\n"
  },
  {
    "path": "pytorch_segmentation/unet/src/unet.py",
    "content": "from typing import Dict\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\n\nclass DoubleConv(nn.Sequential):\n    def __init__(self, in_channels, out_channels, mid_channels=None):\n        if mid_channels is None:\n            mid_channels = out_channels\n        super(DoubleConv, self).__init__(\n            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),\n            nn.BatchNorm2d(mid_channels),\n            nn.ReLU(inplace=True),\n            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),\n            nn.BatchNorm2d(out_channels),\n            nn.ReLU(inplace=True)\n        )\n\n\nclass Down(nn.Sequential):\n    def __init__(self, in_channels, out_channels):\n        super(Down, self).__init__(\n            nn.MaxPool2d(2, stride=2),\n            DoubleConv(in_channels, out_channels)\n        )\n\n\nclass Up(nn.Module):\n    def __init__(self, in_channels, out_channels, bilinear=True):\n        super(Up, self).__init__()\n        if bilinear:\n            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)\n            self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)\n        else:\n            self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)\n            self.conv = DoubleConv(in_channels, out_channels)\n\n    def forward(self, x1: torch.Tensor, x2: torch.Tensor) -> torch.Tensor:\n        x1 = self.up(x1)\n        # [N, C, H, W]\n        diff_y = x2.size()[2] - x1.size()[2]\n        diff_x = x2.size()[3] - x1.size()[3]\n\n        # padding_left, padding_right, padding_top, padding_bottom\n        x1 = F.pad(x1, [diff_x // 2, diff_x - diff_x // 2,\n                        diff_y // 2, diff_y - diff_y // 2])\n\n        x = torch.cat([x2, x1], dim=1)\n        x = self.conv(x)\n        return x\n\n\nclass OutConv(nn.Sequential):\n    def __init__(self, in_channels, num_classes):\n        super(OutConv, self).__init__(\n            nn.Conv2d(in_channels, num_classes, kernel_size=1)\n        )\n\n\nclass UNet(nn.Module):\n    def __init__(self,\n                 in_channels: int = 1,\n                 num_classes: int = 2,\n                 bilinear: bool = True,\n                 base_c: int = 64):\n        super(UNet, self).__init__()\n        self.in_channels = in_channels\n        self.num_classes = num_classes\n        self.bilinear = bilinear\n\n        self.in_conv = DoubleConv(in_channels, base_c)\n        self.down1 = Down(base_c, base_c * 2)\n        self.down2 = Down(base_c * 2, base_c * 4)\n        self.down3 = Down(base_c * 4, base_c * 8)\n        factor = 2 if bilinear else 1\n        self.down4 = Down(base_c * 8, base_c * 16 // factor)\n        self.up1 = Up(base_c * 16, base_c * 8 // factor, bilinear)\n        self.up2 = Up(base_c * 8, base_c * 4 // factor, bilinear)\n        self.up3 = Up(base_c * 4, base_c * 2 // factor, bilinear)\n        self.up4 = Up(base_c * 2, base_c, bilinear)\n        self.out_conv = OutConv(base_c, num_classes)\n\n    def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]:\n        x1 = self.in_conv(x)\n        x2 = self.down1(x1)\n        x3 = self.down2(x2)\n        x4 = self.down3(x3)\n        x5 = self.down4(x4)\n        x = self.up1(x5, x4)\n        x = self.up2(x, x3)\n        x = self.up3(x, x2)\n        x = self.up4(x, x1)\n        logits = self.out_conv(x)\n\n        return {\"out\": logits}\n"
  },
  {
    "path": "pytorch_segmentation/unet/src/vgg_unet.py",
    "content": "from collections import OrderedDict\nfrom typing import Dict\n\nimport torch\nimport torch.nn as nn\nfrom torch import Tensor\nfrom torchvision.models import vgg16_bn\nfrom .unet import Up, OutConv\n\n\nclass IntermediateLayerGetter(nn.ModuleDict):\n    \"\"\"\n    Module wrapper that returns intermediate layers from a model\n\n    It has a strong assumption that the modules have been registered\n    into the model in the same order as they are used.\n    This means that one should **not** reuse the same nn.Module\n    twice in the forward if you want this to work.\n\n    Additionally, it is only able to query submodules that are directly\n    assigned to the model. So if `model` is passed, `model.feature1` can\n    be returned, but not `model.feature1.layer2`.\n\n    Args:\n        model (nn.Module): model on which we will extract the features\n        return_layers (Dict[name, new_name]): a dict containing the names\n            of the modules for which the activations will be returned as\n            the key of the dict, and the value of the dict is the name\n            of the returned activation (which the user can specify).\n    \"\"\"\n    _version = 2\n    __annotations__ = {\n        \"return_layers\": Dict[str, str],\n    }\n\n    def __init__(self, model: nn.Module, return_layers: Dict[str, str]) -> None:\n        if not set(return_layers).issubset([name for name, _ in model.named_children()]):\n            raise ValueError(\"return_layers are not present in model\")\n        orig_return_layers = return_layers\n        return_layers = {str(k): str(v) for k, v in return_layers.items()}\n\n        # 重新构建backbone，将没有使用到的模块全部删掉\n        layers = OrderedDict()\n        for name, module in model.named_children():\n            layers[name] = module\n            if name in return_layers:\n                del return_layers[name]\n            if not return_layers:\n                break\n\n        super(IntermediateLayerGetter, self).__init__(layers)\n        self.return_layers = orig_return_layers\n\n    def forward(self, x: Tensor) -> Dict[str, Tensor]:\n        out = OrderedDict()\n        for name, module in self.items():\n            x = module(x)\n            if name in self.return_layers:\n                out_name = self.return_layers[name]\n                out[out_name] = x\n        return out\n\n\nclass VGG16UNet(nn.Module):\n    def __init__(self, num_classes, pretrain_backbone: bool = False):\n        super(VGG16UNet, self).__init__()\n        backbone = vgg16_bn(pretrained=pretrain_backbone)\n\n        # if pretrain_backbone:\n        #     # 载入vgg16_bn预训练权重\n        #     # https://download.pytorch.org/models/vgg16_bn-6c64b313.pth\n        #     backbone.load_state_dict(torch.load(\"vgg16_bn.pth\", map_location='cpu'))\n\n        backbone = backbone.features\n\n        stage_indices = [5, 12, 22, 32, 42]\n        self.stage_out_channels = [64, 128, 256, 512, 512]\n        return_layers = dict([(str(j), f\"stage{i}\") for i, j in enumerate(stage_indices)])\n        self.backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)\n\n        c = self.stage_out_channels[4] + self.stage_out_channels[3]\n        self.up1 = Up(c, self.stage_out_channels[3])\n        c = self.stage_out_channels[3] + self.stage_out_channels[2]\n        self.up2 = Up(c, self.stage_out_channels[2])\n        c = self.stage_out_channels[2] + self.stage_out_channels[1]\n        self.up3 = Up(c, self.stage_out_channels[1])\n        c = self.stage_out_channels[1] + self.stage_out_channels[0]\n        self.up4 = Up(c, self.stage_out_channels[0])\n        self.conv = OutConv(self.stage_out_channels[0], num_classes=num_classes)\n\n    def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]:\n        backbone_out = self.backbone(x)\n        x = self.up1(backbone_out['stage4'], backbone_out['stage3'])\n        x = self.up2(x, backbone_out['stage2'])\n        x = self.up3(x, backbone_out['stage1'])\n        x = self.up4(x, backbone_out['stage0'])\n        x = self.conv(x)\n\n        return {\"out\": x}\n"
  },
  {
    "path": "pytorch_segmentation/unet/train.py",
    "content": "import os\nimport time\nimport datetime\n\nimport torch\n\nfrom src import UNet\nfrom train_utils import train_one_epoch, evaluate, create_lr_scheduler\nfrom my_dataset import DriveDataset\nimport transforms as T\n\n\nclass SegmentationPresetTrain:\n    def __init__(self, base_size, crop_size, hflip_prob=0.5, vflip_prob=0.5,\n                 mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        min_size = int(0.5 * base_size)\n        max_size = int(1.2 * base_size)\n\n        trans = [T.RandomResize(min_size, max_size)]\n        if hflip_prob > 0:\n            trans.append(T.RandomHorizontalFlip(hflip_prob))\n        if vflip_prob > 0:\n            trans.append(T.RandomVerticalFlip(vflip_prob))\n        trans.extend([\n            T.RandomCrop(crop_size),\n            T.ToTensor(),\n            T.Normalize(mean=mean, std=std),\n        ])\n        self.transforms = T.Compose(trans)\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\nclass SegmentationPresetEval:\n    def __init__(self, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        self.transforms = T.Compose([\n            T.ToTensor(),\n            T.Normalize(mean=mean, std=std),\n        ])\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\ndef get_transform(train, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n    base_size = 565\n    crop_size = 480\n\n    if train:\n        return SegmentationPresetTrain(base_size, crop_size, mean=mean, std=std)\n    else:\n        return SegmentationPresetEval(mean=mean, std=std)\n\n\ndef create_model(num_classes):\n    model = UNet(in_channels=3, num_classes=num_classes, base_c=32)\n    return model\n\n\ndef main(args):\n    device = torch.device(args.device if torch.cuda.is_available() else \"cpu\")\n    batch_size = args.batch_size\n    # segmentation nun_classes + background\n    num_classes = args.num_classes + 1\n\n    # using compute_mean_std.py\n    mean = (0.709, 0.381, 0.224)\n    std = (0.127, 0.079, 0.043)\n\n    # 用来保存训练以及验证过程中信息\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    train_dataset = DriveDataset(args.data_path,\n                                 train=True,\n                                 transforms=get_transform(train=True, mean=mean, std=std))\n\n    val_dataset = DriveDataset(args.data_path,\n                               train=False,\n                               transforms=get_transform(train=False, mean=mean, std=std))\n\n    num_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])\n    train_loader = torch.utils.data.DataLoader(train_dataset,\n                                               batch_size=batch_size,\n                                               num_workers=num_workers,\n                                               shuffle=True,\n                                               pin_memory=True,\n                                               collate_fn=train_dataset.collate_fn)\n\n    val_loader = torch.utils.data.DataLoader(val_dataset,\n                                             batch_size=1,\n                                             num_workers=num_workers,\n                                             pin_memory=True,\n                                             collate_fn=val_dataset.collate_fn)\n\n    model = create_model(num_classes=num_classes)\n    model.to(device)\n\n    params_to_optimize = [p for p in model.parameters() if p.requires_grad]\n\n    optimizer = torch.optim.SGD(\n        params_to_optimize,\n        lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay\n    )\n\n    scaler = torch.cuda.amp.GradScaler() if args.amp else None\n\n    # 创建学习率更新策略，这里是每个step更新一次(不是每个epoch)\n    lr_scheduler = create_lr_scheduler(optimizer, len(train_loader), args.epochs, warmup=True)\n\n    if args.resume:\n        checkpoint = torch.load(args.resume, map_location='cpu')\n        model.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n        if args.amp:\n            scaler.load_state_dict(checkpoint[\"scaler\"])\n\n    best_dice = 0.\n    start_time = time.time()\n    for epoch in range(args.start_epoch, args.epochs):\n        mean_loss, lr = train_one_epoch(model, optimizer, train_loader, device, epoch, num_classes,\n                                        lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler)\n\n        confmat, dice = evaluate(model, val_loader, device=device, num_classes=num_classes)\n        val_info = str(confmat)\n        print(val_info)\n        print(f\"dice coefficient: {dice:.3f}\")\n        # write into txt\n        with open(results_file, \"a\") as f:\n            # 记录每个epoch对应的train_loss、lr以及验证集各指标\n            train_info = f\"[epoch: {epoch}]\\n\" \\\n                         f\"train_loss: {mean_loss:.4f}\\n\" \\\n                         f\"lr: {lr:.6f}\\n\" \\\n                         f\"dice coefficient: {dice:.3f}\\n\"\n            f.write(train_info + val_info + \"\\n\\n\")\n\n        if args.save_best is True:\n            if best_dice < dice:\n                best_dice = dice\n            else:\n                continue\n\n        save_file = {\"model\": model.state_dict(),\n                     \"optimizer\": optimizer.state_dict(),\n                     \"lr_scheduler\": lr_scheduler.state_dict(),\n                     \"epoch\": epoch,\n                     \"args\": args}\n        if args.amp:\n            save_file[\"scaler\"] = scaler.state_dict()\n\n        if args.save_best is True:\n            torch.save(save_file, \"save_weights/best_model.pth\")\n        else:\n            torch.save(save_file, \"save_weights/model_{}.pth\".format(epoch))\n\n    total_time = time.time() - start_time\n    total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n    print(\"training time {}\".format(total_time_str))\n\n\ndef parse_args():\n    import argparse\n    parser = argparse.ArgumentParser(description=\"pytorch unet training\")\n\n    parser.add_argument(\"--data-path\", default=\"./\", help=\"DRIVE root\")\n    # exclude background\n    parser.add_argument(\"--num-classes\", default=1, type=int)\n    parser.add_argument(\"--device\", default=\"cuda\", help=\"training device\")\n    parser.add_argument(\"-b\", \"--batch-size\", default=4, type=int)\n    parser.add_argument(\"--epochs\", default=200, type=int, metavar=\"N\",\n                        help=\"number of total epochs to train\")\n\n    parser.add_argument('--lr', default=0.01, type=float, help='initial learning rate')\n    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',\n                        help='momentum')\n    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,\n                        metavar='W', help='weight decay (default: 1e-4)',\n                        dest='weight_decay')\n    parser.add_argument('--print-freq', default=1, type=int, help='print frequency')\n    parser.add_argument('--resume', default='', help='resume from checkpoint')\n    parser.add_argument('--start-epoch', default=0, type=int, metavar='N',\n                        help='start epoch')\n    parser.add_argument('--save-best', default=True, type=bool, help='only save best dice weights')\n    # Mixed precision training parameters\n    parser.add_argument(\"--amp\", default=False, type=bool,\n                        help=\"Use torch.cuda.amp for mixed precision training\")\n\n    args = parser.parse_args()\n\n    return args\n\n\nif __name__ == '__main__':\n    args = parse_args()\n\n    if not os.path.exists(\"./save_weights\"):\n        os.mkdir(\"./save_weights\")\n\n    main(args)\n"
  },
  {
    "path": "pytorch_segmentation/unet/train_multi_GPU.py",
    "content": "import time\nimport os\nimport datetime\n\nimport torch\n\nfrom src import UNet\nfrom train_utils import train_one_epoch, evaluate, create_lr_scheduler, init_distributed_mode, save_on_master, mkdir\nfrom my_dataset import DriveDataset\nimport transforms as T\n\n\nclass SegmentationPresetTrain:\n    def __init__(self, base_size, crop_size, hflip_prob=0.5, vflip_prob=0.5,\n                 mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        min_size = int(0.5 * base_size)\n        max_size = int(1.2 * base_size)\n\n        trans = [T.RandomResize(min_size, max_size)]\n        if hflip_prob > 0:\n            trans.append(T.RandomHorizontalFlip(hflip_prob))\n        if vflip_prob > 0:\n            trans.append(T.RandomVerticalFlip(vflip_prob))\n        trans.extend([\n            T.RandomCrop(crop_size),\n            T.ToTensor(),\n            T.Normalize(mean=mean, std=std),\n        ])\n        self.transforms = T.Compose(trans)\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\nclass SegmentationPresetEval:\n    def __init__(self, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n        self.transforms = T.Compose([\n            T.ToTensor(),\n            T.Normalize(mean=mean, std=std),\n        ])\n\n    def __call__(self, img, target):\n        return self.transforms(img, target)\n\n\ndef get_transform(train, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):\n    base_size = 565\n    crop_size = 480\n\n    if train:\n        return SegmentationPresetTrain(base_size, crop_size, mean=mean, std=std)\n    else:\n        return SegmentationPresetEval(mean=mean, std=std)\n\n\ndef create_model(num_classes):\n    model = UNet(in_channels=3, num_classes=num_classes, base_c=32)\n    return model\n\n\ndef main(args):\n    init_distributed_mode(args)\n    print(args)\n\n    device = torch.device(args.device)\n    # segmentation nun_classes + background\n    num_classes = args.num_classes + 1\n\n    mean = (0.709, 0.381, 0.224)\n    std = (0.127, 0.079, 0.043)\n\n    # 用来保存coco_info的文件\n    results_file = \"results{}.txt\".format(datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\"))\n\n    data_root = args.data_path\n    # check data root\n    if os.path.exists(os.path.join(data_root, \"DRIVE\")) is False:\n        raise FileNotFoundError(\"DRIVE dose not in path:'{}'.\".format(data_root))\n\n    train_dataset = DriveDataset(args.data_path,\n                                 train=True,\n                                 transforms=get_transform(train=True, mean=mean, std=std))\n\n    val_dataset = DriveDataset(args.data_path,\n                               train=False,\n                               transforms=get_transform(train=False, mean=mean, std=std))\n\n    print(\"Creating data loaders\")\n    if args.distributed:\n        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)\n        test_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)\n    else:\n        train_sampler = torch.utils.data.RandomSampler(train_dataset)\n        test_sampler = torch.utils.data.SequentialSampler(val_dataset)\n\n    train_data_loader = torch.utils.data.DataLoader(\n        train_dataset, batch_size=args.batch_size,\n        sampler=train_sampler, num_workers=args.workers,\n        collate_fn=train_dataset.collate_fn, drop_last=True)\n\n    val_data_loader = torch.utils.data.DataLoader(\n        val_dataset, batch_size=1,\n        sampler=test_sampler, num_workers=args.workers,\n        collate_fn=train_dataset.collate_fn)\n\n    print(\"Creating model\")\n    # create model num_classes equal background + foreground classes\n    model = create_model(num_classes=num_classes)\n    model.to(device)\n\n    if args.sync_bn:\n        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)\n\n    model_without_ddp = model\n    if args.distributed:\n        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])\n        model_without_ddp = model.module\n\n    params_to_optimize = [p for p in model_without_ddp.parameters() if p.requires_grad]\n\n    optimizer = torch.optim.SGD(\n        params_to_optimize,\n        lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)\n\n    scaler = torch.cuda.amp.GradScaler() if args.amp else None\n\n    # 创建学习率更新策略，这里是每个step更新一次(不是每个epoch)\n    lr_scheduler = create_lr_scheduler(optimizer, len(train_data_loader), args.epochs, warmup=True)\n\n    # 如果传入resume参数，即上次训练的权重地址，则接着上次的参数训练\n    if args.resume:\n        # If map_location is missing, torch.load will first load the module to CPU\n        # and then copy each parameter to where it was saved,\n        # which would result in all processes on the same machine using the same set of devices.\n        checkpoint = torch.load(args.resume, map_location='cpu')  # 读取之前保存的权重文件(包括优化器以及学习率策略)\n        model_without_ddp.load_state_dict(checkpoint['model'])\n        optimizer.load_state_dict(checkpoint['optimizer'])\n        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])\n        args.start_epoch = checkpoint['epoch'] + 1\n        if args.amp:\n            scaler.load_state_dict(checkpoint[\"scaler\"])\n\n    if args.test_only:\n        confmat = evaluate(model, val_data_loader, device=device, num_classes=num_classes)\n        val_info = str(confmat)\n        print(val_info)\n        return\n\n    best_dice = 0.\n    print(\"Start training\")\n    start_time = time.time()\n    for epoch in range(args.start_epoch, args.epochs):\n        if args.distributed:\n            train_sampler.set_epoch(epoch)\n        mean_loss, lr = train_one_epoch(model, optimizer, train_data_loader, device, epoch, num_classes,\n                                        lr_scheduler=lr_scheduler, print_freq=args.print_freq, scaler=scaler)\n\n        confmat, dice = evaluate(model, val_data_loader, device=device, num_classes=num_classes)\n        val_info = str(confmat)\n        print(val_info)\n        print(f\"dice coefficient: {dice:.3f}\")\n\n        # 只在主进程上进行写操作\n        if args.rank in [-1, 0]:\n            # write into txt\n            with open(results_file, \"a\") as f:\n                # 记录每个epoch对应的train_loss、lr以及验证集各指标\n                train_info = f\"[epoch: {epoch}]\\n\" \\\n                             f\"train_loss: {mean_loss:.4f}\\n\" \\\n                             f\"lr: {lr:.6f}\\n\" \\\n                             f\"dice coefficient: {dice:.3f}\\n\"\n                f.write(train_info + val_info + \"\\n\\n\")\n\n        if args.save_best is True:\n            if best_dice < dice:\n                best_dice = dice\n            else:\n                continue\n\n        if args.output_dir:\n            # 只在主节点上执行保存权重操作\n            save_file = {'model': model_without_ddp.state_dict(),\n                         'optimizer': optimizer.state_dict(),\n                         'lr_scheduler': lr_scheduler.state_dict(),\n                         'args': args,\n                         'epoch': epoch}\n            if args.amp:\n                save_file[\"scaler\"] = scaler.state_dict()\n\n            if args.save_best is True:\n                save_on_master(save_file,\n                               os.path.join(args.output_dir, 'best_model.pth'))\n            else:\n                save_on_master(save_file,\n                               os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))\n\n    total_time = time.time() - start_time\n    total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n    print('Training time {}'.format(total_time_str))\n\n\nif __name__ == \"__main__\":\n    import argparse\n\n    parser = argparse.ArgumentParser(\n        description=__doc__)\n\n    # 训练文件的根目录(DRIVE)\n    parser.add_argument('--data-path', default='./', help='dataset')\n    # 训练设备类型\n    parser.add_argument('--device', default='cuda', help='device')\n    # 检测目标类别数(不包含背景)\n    parser.add_argument('--num-classes', default=1, type=int, help='num_classes')\n    # 每块GPU上的batch_size\n    parser.add_argument('-b', '--batch-size', default=4, type=int,\n                        help='images per gpu, the total batch size is $NGPU x batch_size')\n    # 指定接着从哪个epoch数开始训练\n    parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')\n    # 训练的总epoch数\n    parser.add_argument('--epochs', default=200, type=int, metavar='N',\n                        help='number of total epochs to run')\n    # 是否使用同步BN(在多个GPU之间同步)，默认不开启，开启后训练速度会变慢\n    parser.add_argument('--sync_bn', type=bool, default=False, help='whether using SyncBatchNorm')\n    # 数据加载以及预处理的线程数\n    parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',\n                        help='number of data loading workers (default: 4)')\n    # 训练学习率，这里默认设置成0.01(使用n块GPU建议乘以n)，如果效果不好可以尝试修改学习率\n    parser.add_argument('--lr', default=0.01, type=float,\n                        help='initial learning rate')\n    # SGD的momentum参数\n    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',\n                        help='momentum')\n    # SGD的weight_decay参数\n    parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,\n                        metavar='W', help='weight decay (default: 1e-4)',\n                        dest='weight_decay')\n    # 只保存dice coefficient值最高的权重\n    parser.add_argument('--save-best', default=True, type=bool, help='only save best weights')\n    # 训练过程打印信息的频率\n    parser.add_argument('--print-freq', default=1, type=int, help='print frequency')\n    # 文件保存地址\n    parser.add_argument('--output-dir', default='./multi_train', help='path where to save')\n    # 基于上次的训练结果接着训练\n    parser.add_argument('--resume', default='', help='resume from checkpoint')\n    # 不训练，仅测试\n    parser.add_argument(\n        \"--test-only\",\n        dest=\"test_only\",\n        help=\"Only test the model\",\n        action=\"store_true\",\n    )\n\n    # 分布式进程数\n    parser.add_argument('--world-size', default=1, type=int,\n                        help='number of distributed processes')\n    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')\n    # Mixed precision training parameters\n    parser.add_argument(\"--amp\", default=False, type=bool,\n                        help=\"Use torch.cuda.amp for mixed precision training\")\n\n    args = parser.parse_args()\n\n    # 如果指定了保存文件地址，检查文件夹是否存在，若不存在，则创建\n    if args.output_dir:\n        mkdir(args.output_dir)\n\n    main(args)\n"
  },
  {
    "path": "pytorch_segmentation/unet/train_utils/__init__.py",
    "content": "from .train_and_eval import train_one_epoch, evaluate, create_lr_scheduler\nfrom .distributed_utils import init_distributed_mode, save_on_master, mkdir\n"
  },
  {
    "path": "pytorch_segmentation/unet/train_utils/dice_coefficient_loss.py",
    "content": "import torch\nimport torch.nn as nn\n\n\ndef build_target(target: torch.Tensor, num_classes: int = 2, ignore_index: int = -100):\n    \"\"\"build target for dice coefficient\"\"\"\n    dice_target = target.clone()\n    if ignore_index >= 0:\n        ignore_mask = torch.eq(target, ignore_index)\n        dice_target[ignore_mask] = 0\n        # [N, H, W] -> [N, H, W, C]\n        dice_target = nn.functional.one_hot(dice_target, num_classes).float()\n        dice_target[ignore_mask] = ignore_index\n    else:\n        dice_target = nn.functional.one_hot(dice_target, num_classes).float()\n\n    return dice_target.permute(0, 3, 1, 2)\n\n\ndef dice_coeff(x: torch.Tensor, target: torch.Tensor, ignore_index: int = -100, epsilon=1e-6):\n    # Average of Dice coefficient for all batches, or for a single mask\n    # 计算一个batch中所有图片某个类别的dice_coefficient\n    d = 0.\n    batch_size = x.shape[0]\n    for i in range(batch_size):\n        x_i = x[i].reshape(-1)\n        t_i = target[i].reshape(-1)\n        if ignore_index >= 0:\n            # 找出mask中不为ignore_index的区域\n            roi_mask = torch.ne(t_i, ignore_index)\n            x_i = x_i[roi_mask]\n            t_i = t_i[roi_mask]\n        inter = torch.dot(x_i, t_i)\n        sets_sum = torch.sum(x_i) + torch.sum(t_i)\n        if sets_sum == 0:\n            sets_sum = 2 * inter\n\n        d += (2 * inter + epsilon) / (sets_sum + epsilon)\n\n    return d / batch_size\n\n\ndef multiclass_dice_coeff(x: torch.Tensor, target: torch.Tensor, ignore_index: int = -100, epsilon=1e-6):\n    \"\"\"Average of Dice coefficient for all classes\"\"\"\n    dice = 0.\n    for channel in range(x.shape[1]):\n        dice += dice_coeff(x[:, channel, ...], target[:, channel, ...], ignore_index, epsilon)\n\n    return dice / x.shape[1]\n\n\ndef dice_loss(x: torch.Tensor, target: torch.Tensor, multiclass: bool = False, ignore_index: int = -100):\n    # Dice loss (objective to minimize) between 0 and 1\n    x = nn.functional.softmax(x, dim=1)\n    fn = multiclass_dice_coeff if multiclass else dice_coeff\n    return 1 - fn(x, target, ignore_index=ignore_index)\n"
  },
  {
    "path": "pytorch_segmentation/unet/train_utils/distributed_utils.py",
    "content": "from collections import defaultdict, deque\nimport datetime\nimport time\nimport torch\nimport torch.nn.functional as F\nimport torch.distributed as dist\n\nimport errno\nimport os\n\nfrom .dice_coefficient_loss import multiclass_dice_coeff, build_target\n\n\nclass SmoothedValue(object):\n    \"\"\"Track a series of values and provide access to smoothed values over a\n    window or the global series average.\n    \"\"\"\n\n    def __init__(self, window_size=20, fmt=None):\n        if fmt is None:\n            fmt = \"{value:.4f} ({global_avg:.4f})\"\n        self.deque = deque(maxlen=window_size)\n        self.total = 0.0\n        self.count = 0\n        self.fmt = fmt\n\n    def update(self, value, n=1):\n        self.deque.append(value)\n        self.count += n\n        self.total += value * n\n\n    def synchronize_between_processes(self):\n        \"\"\"\n        Warning: does not synchronize the deque!\n        \"\"\"\n        if not is_dist_avail_and_initialized():\n            return\n        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')\n        dist.barrier()\n        dist.all_reduce(t)\n        t = t.tolist()\n        self.count = int(t[0])\n        self.total = t[1]\n\n    @property\n    def median(self):\n        d = torch.tensor(list(self.deque))\n        return d.median().item()\n\n    @property\n    def avg(self):\n        d = torch.tensor(list(self.deque), dtype=torch.float32)\n        return d.mean().item()\n\n    @property\n    def global_avg(self):\n        return self.total / self.count\n\n    @property\n    def max(self):\n        return max(self.deque)\n\n    @property\n    def value(self):\n        return self.deque[-1]\n\n    def __str__(self):\n        return self.fmt.format(\n            median=self.median,\n            avg=self.avg,\n            global_avg=self.global_avg,\n            max=self.max,\n            value=self.value)\n\n\nclass ConfusionMatrix(object):\n    def __init__(self, num_classes):\n        self.num_classes = num_classes\n        self.mat = None\n\n    def update(self, a, b):\n        n = self.num_classes\n        if self.mat is None:\n            # 创建混淆矩阵\n            self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device)\n        with torch.no_grad():\n            # 寻找GT中为目标的像素索引\n            k = (a >= 0) & (a < n)\n            # 统计像素真实类别a[k]被预测成类别b[k]的个数(这里的做法很巧妙)\n            inds = n * a[k].to(torch.int64) + b[k]\n            self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n)\n\n    def reset(self):\n        if self.mat is not None:\n            self.mat.zero_()\n\n    def compute(self):\n        h = self.mat.float()\n        # 计算全局预测准确率(混淆矩阵的对角线为预测正确的个数)\n        acc_global = torch.diag(h).sum() / h.sum()\n        # 计算每个类别的准确率\n        acc = torch.diag(h) / h.sum(1)\n        # 计算每个类别预测与真实目标的iou\n        iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h))\n        return acc_global, acc, iu\n\n    def reduce_from_all_processes(self):\n        if not torch.distributed.is_available():\n            return\n        if not torch.distributed.is_initialized():\n            return\n        torch.distributed.barrier()\n        torch.distributed.all_reduce(self.mat)\n\n    def __str__(self):\n        acc_global, acc, iu = self.compute()\n        return (\n            'global correct: {:.1f}\\n'\n            'average row correct: {}\\n'\n            'IoU: {}\\n'\n            'mean IoU: {:.1f}').format(\n                acc_global.item() * 100,\n                ['{:.1f}'.format(i) for i in (acc * 100).tolist()],\n                ['{:.1f}'.format(i) for i in (iu * 100).tolist()],\n                iu.mean().item() * 100)\n\n\nclass DiceCoefficient(object):\n    def __init__(self, num_classes: int = 2, ignore_index: int = -100):\n        self.cumulative_dice = None\n        self.num_classes = num_classes\n        self.ignore_index = ignore_index\n        self.count = None\n\n    def update(self, pred, target):\n        if self.cumulative_dice is None:\n            self.cumulative_dice = torch.zeros(1, dtype=pred.dtype, device=pred.device)\n        if self.count is None:\n            self.count = torch.zeros(1, dtype=pred.dtype, device=pred.device)\n        # compute the Dice score, ignoring background\n        pred = F.one_hot(pred.argmax(dim=1), self.num_classes).permute(0, 3, 1, 2).float()\n        dice_target = build_target(target, self.num_classes, self.ignore_index)\n        self.cumulative_dice += multiclass_dice_coeff(pred[:, 1:], dice_target[:, 1:], ignore_index=self.ignore_index)\n        self.count += 1\n\n    @property\n    def value(self):\n        if self.count == 0:\n            return 0\n        else:\n            return self.cumulative_dice / self.count\n\n    def reset(self):\n        if self.cumulative_dice is not None:\n            self.cumulative_dice.zero_()\n\n        if self.count is not None:\n            self.count.zeros_()\n\n    def reduce_from_all_processes(self):\n        if not torch.distributed.is_available():\n            return\n        if not torch.distributed.is_initialized():\n            return\n        torch.distributed.barrier()\n        torch.distributed.all_reduce(self.cumulative_dice)\n        torch.distributed.all_reduce(self.count)\n\n\nclass MetricLogger(object):\n    def __init__(self, delimiter=\"\\t\"):\n        self.meters = defaultdict(SmoothedValue)\n        self.delimiter = delimiter\n\n    def update(self, **kwargs):\n        for k, v in kwargs.items():\n            if isinstance(v, torch.Tensor):\n                v = v.item()\n            assert isinstance(v, (float, int))\n            self.meters[k].update(v)\n\n    def __getattr__(self, attr):\n        if attr in self.meters:\n            return self.meters[attr]\n        if attr in self.__dict__:\n            return self.__dict__[attr]\n        raise AttributeError(\"'{}' object has no attribute '{}'\".format(\n            type(self).__name__, attr))\n\n    def __str__(self):\n        loss_str = []\n        for name, meter in self.meters.items():\n            loss_str.append(\n                \"{}: {}\".format(name, str(meter))\n            )\n        return self.delimiter.join(loss_str)\n\n    def synchronize_between_processes(self):\n        for meter in self.meters.values():\n            meter.synchronize_between_processes()\n\n    def add_meter(self, name, meter):\n        self.meters[name] = meter\n\n    def log_every(self, iterable, print_freq, header=None):\n        i = 0\n        if not header:\n            header = ''\n        start_time = time.time()\n        end = time.time()\n        iter_time = SmoothedValue(fmt='{avg:.4f}')\n        data_time = SmoothedValue(fmt='{avg:.4f}')\n        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'\n        if torch.cuda.is_available():\n            log_msg = self.delimiter.join([\n                header,\n                '[{0' + space_fmt + '}/{1}]',\n                'eta: {eta}',\n                '{meters}',\n                'time: {time}',\n                'data: {data}',\n                'max mem: {memory:.0f}'\n            ])\n        else:\n            log_msg = self.delimiter.join([\n                header,\n                '[{0' + space_fmt + '}/{1}]',\n                'eta: {eta}',\n                '{meters}',\n                'time: {time}',\n                'data: {data}'\n            ])\n        MB = 1024.0 * 1024.0\n        for obj in iterable:\n            data_time.update(time.time() - end)\n            yield obj\n            iter_time.update(time.time() - end)\n            if i % print_freq == 0:\n                eta_seconds = iter_time.global_avg * (len(iterable) - i)\n                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))\n                if torch.cuda.is_available():\n                    print(log_msg.format(\n                        i, len(iterable), eta=eta_string,\n                        meters=str(self),\n                        time=str(iter_time), data=str(data_time),\n                        memory=torch.cuda.max_memory_allocated() / MB))\n                else:\n                    print(log_msg.format(\n                        i, len(iterable), eta=eta_string,\n                        meters=str(self),\n                        time=str(iter_time), data=str(data_time)))\n            i += 1\n            end = time.time()\n        total_time = time.time() - start_time\n        total_time_str = str(datetime.timedelta(seconds=int(total_time)))\n        print('{} Total time: {}'.format(header, total_time_str))\n\n\ndef mkdir(path):\n    try:\n        os.makedirs(path)\n    except OSError as e:\n        if e.errno != errno.EEXIST:\n            raise\n\n\ndef setup_for_distributed(is_master):\n    \"\"\"\n    This function disables printing when not in master process\n    \"\"\"\n    import builtins as __builtin__\n    builtin_print = __builtin__.print\n\n    def print(*args, **kwargs):\n        force = kwargs.pop('force', False)\n        if is_master or force:\n            builtin_print(*args, **kwargs)\n\n    __builtin__.print = print\n\n\ndef is_dist_avail_and_initialized():\n    if not dist.is_available():\n        return False\n    if not dist.is_initialized():\n        return False\n    return True\n\n\ndef get_world_size():\n    if not is_dist_avail_and_initialized():\n        return 1\n    return dist.get_world_size()\n\n\ndef get_rank():\n    if not is_dist_avail_and_initialized():\n        return 0\n    return dist.get_rank()\n\n\ndef is_main_process():\n    return get_rank() == 0\n\n\ndef save_on_master(*args, **kwargs):\n    if is_main_process():\n        torch.save(*args, **kwargs)\n\n\ndef init_distributed_mode(args):\n    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:\n        args.rank = int(os.environ[\"RANK\"])\n        args.world_size = int(os.environ['WORLD_SIZE'])\n        args.gpu = int(os.environ['LOCAL_RANK'])\n    elif 'SLURM_PROCID' in os.environ:\n        args.rank = int(os.environ['SLURM_PROCID'])\n        args.gpu = args.rank % torch.cuda.device_count()\n    elif hasattr(args, \"rank\"):\n        pass\n    else:\n        print('Not using distributed mode')\n        args.distributed = False\n        return\n\n    args.distributed = True\n\n    torch.cuda.set_device(args.gpu)\n    args.dist_backend = 'nccl'\n    print('| distributed init (rank {}): {}'.format(\n        args.rank, args.dist_url), flush=True)\n    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,\n                                         world_size=args.world_size, rank=args.rank)\n    setup_for_distributed(args.rank == 0)\n"
  },
  {
    "path": "pytorch_segmentation/unet/train_utils/train_and_eval.py",
    "content": "import torch\nfrom torch import nn\nimport train_utils.distributed_utils as utils\nfrom .dice_coefficient_loss import dice_loss, build_target\n\n\ndef criterion(inputs, target, loss_weight=None, num_classes: int = 2, dice: bool = True, ignore_index: int = -100):\n    losses = {}\n    for name, x in inputs.items():\n        # 忽略target中值为255的像素，255的像素是目标边缘或者padding填充\n        loss = nn.functional.cross_entropy(x, target, ignore_index=ignore_index, weight=loss_weight)\n        if dice is True:\n            dice_target = build_target(target, num_classes, ignore_index)\n            loss += dice_loss(x, dice_target, multiclass=True, ignore_index=ignore_index)\n        losses[name] = loss\n\n    if len(losses) == 1:\n        return losses['out']\n\n    return losses['out'] + 0.5 * losses['aux']\n\n\ndef evaluate(model, data_loader, device, num_classes):\n    model.eval()\n    confmat = utils.ConfusionMatrix(num_classes)\n    dice = utils.DiceCoefficient(num_classes=num_classes, ignore_index=255)\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    header = 'Test:'\n    with torch.no_grad():\n        for image, target in metric_logger.log_every(data_loader, 100, header):\n            image, target = image.to(device), target.to(device)\n            output = model(image)\n            output = output['out']\n\n            confmat.update(target.flatten(), output.argmax(1).flatten())\n            dice.update(output, target)\n\n        confmat.reduce_from_all_processes()\n        dice.reduce_from_all_processes()\n\n    return confmat, dice.value.item()\n\n\ndef train_one_epoch(model, optimizer, data_loader, device, epoch, num_classes,\n                    lr_scheduler, print_freq=10, scaler=None):\n    model.train()\n    metric_logger = utils.MetricLogger(delimiter=\"  \")\n    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))\n    header = 'Epoch: [{}]'.format(epoch)\n\n    if num_classes == 2:\n        # 设置cross_entropy中背景和前景的loss权重(根据自己的数据集进行设置)\n        loss_weight = torch.as_tensor([1.0, 2.0], device=device)\n    else:\n        loss_weight = None\n\n    for image, target in metric_logger.log_every(data_loader, print_freq, header):\n        image, target = image.to(device), target.to(device)\n        with torch.cuda.amp.autocast(enabled=scaler is not None):\n            output = model(image)\n            loss = criterion(output, target, loss_weight, num_classes=num_classes, ignore_index=255)\n\n        optimizer.zero_grad()\n        if scaler is not None:\n            scaler.scale(loss).backward()\n            scaler.step(optimizer)\n            scaler.update()\n        else:\n            loss.backward()\n            optimizer.step()\n\n        lr_scheduler.step()\n\n        lr = optimizer.param_groups[0][\"lr\"]\n        metric_logger.update(loss=loss.item(), lr=lr)\n\n    return metric_logger.meters[\"loss\"].global_avg, lr\n\n\ndef create_lr_scheduler(optimizer,\n                        num_step: int,\n                        epochs: int,\n                        warmup=True,\n                        warmup_epochs=1,\n                        warmup_factor=1e-3):\n    assert num_step > 0 and epochs > 0\n    if warmup is False:\n        warmup_epochs = 0\n\n    def f(x):\n        \"\"\"\n        根据step数返回一个学习率倍率因子，\n        注意在训练开始之前，pytorch会提前调用一次lr_scheduler.step()方法\n        \"\"\"\n        if warmup is True and x <= (warmup_epochs * num_step):\n            alpha = float(x) / (warmup_epochs * num_step)\n            # warmup过程中lr倍率因子从warmup_factor -> 1\n            return warmup_factor * (1 - alpha) + alpha\n        else:\n            # warmup后lr倍率因子从1 -> 0\n            # 参考deeplab_v2: Learning rate policy\n            return (1 - (x - warmup_epochs * num_step) / ((epochs - warmup_epochs) * num_step)) ** 0.9\n\n    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=f)\n"
  },
  {
    "path": "pytorch_segmentation/unet/transforms.py",
    "content": "import numpy as np\nimport random\n\nimport torch\nfrom torchvision import transforms as T\nfrom torchvision.transforms import functional as F\n\n\ndef pad_if_smaller(img, size, fill=0):\n    # 如果图像最小边长小于给定size，则用数值fill进行padding\n    min_size = min(img.size)\n    if min_size < size:\n        ow, oh = img.size\n        padh = size - oh if oh < size else 0\n        padw = size - ow if ow < size else 0\n        img = F.pad(img, (0, 0, padw, padh), fill=fill)\n    return img\n\n\nclass Compose(object):\n    def __init__(self, transforms):\n        self.transforms = transforms\n\n    def __call__(self, image, target):\n        for t in self.transforms:\n            image, target = t(image, target)\n        return image, target\n\n\nclass RandomResize(object):\n    def __init__(self, min_size, max_size=None):\n        self.min_size = min_size\n        if max_size is None:\n            max_size = min_size\n        self.max_size = max_size\n\n    def __call__(self, image, target):\n        size = random.randint(self.min_size, self.max_size)\n        # 这里size传入的是int类型，所以是将图像的最小边长缩放到size大小\n        image = F.resize(image, size)\n        # 这里的interpolation注意下，在torchvision(0.9.0)以后才有InterpolationMode.NEAREST\n        # 如果是之前的版本需要使用PIL.Image.NEAREST\n        target = F.resize(target, size, interpolation=T.InterpolationMode.NEAREST)\n        return image, target\n\n\nclass RandomHorizontalFlip(object):\n    def __init__(self, flip_prob):\n        self.flip_prob = flip_prob\n\n    def __call__(self, image, target):\n        if random.random() < self.flip_prob:\n            image = F.hflip(image)\n            target = F.hflip(target)\n        return image, target\n\n\nclass RandomVerticalFlip(object):\n    def __init__(self, flip_prob):\n        self.flip_prob = flip_prob\n\n    def __call__(self, image, target):\n        if random.random() < self.flip_prob:\n            image = F.vflip(image)\n            target = F.vflip(target)\n        return image, target\n\n\nclass RandomCrop(object):\n    def __init__(self, size):\n        self.size = size\n\n    def __call__(self, image, target):\n        image = pad_if_smaller(image, self.size)\n        target = pad_if_smaller(target, self.size, fill=255)\n        crop_params = T.RandomCrop.get_params(image, (self.size, self.size))\n        image = F.crop(image, *crop_params)\n        target = F.crop(target, *crop_params)\n        return image, target\n\n\nclass CenterCrop(object):\n    def __init__(self, size):\n        self.size = size\n\n    def __call__(self, image, target):\n        image = F.center_crop(image, self.size)\n        target = F.center_crop(target, self.size)\n        return image, target\n\n\nclass ToTensor(object):\n    def __call__(self, image, target):\n        image = F.to_tensor(image)\n        target = torch.as_tensor(np.array(target), dtype=torch.int64)\n        return image, target\n\n\nclass Normalize(object):\n    def __init__(self, mean, std):\n        self.mean = mean\n        self.std = std\n\n    def __call__(self, image, target):\n        image = F.normalize(image, mean=self.mean, std=self.std)\n        return image, target\n"
  },
  {
    "path": "summary_problem.md",
    "content": "## Tensorflow2.1 GPU安装与Pytorch1.3 GPU安装\n参考我之前写的博文：[Centos7 安装Tensorflow2.1 GPU以及Pytorch1.3 GPU（CUDA10.1）](https://blog.csdn.net/qq_37541097/article/details/103933366)\n\n\n## keras functional api训练的模型权重与subclassed训练的模型权重能否混用 [tensorflow2.0.0]\n强烈不建议混用，即使两个模型的名称结构完全一致也不要混用，里面有坑，用什么方法训练的模型就载入相应的模型权重\n\n\n## 使用subclassed模型时无法使用model.summary() [tensorflow2.0.0]\nsubclassed模型在实例化时没有自动进行build操作（只有在开始训练时，才会自动进行build），如果需要使用summary操作，需要提前手动build  \nmodel.build((batch_size, height, width, channel))\n\n\n## 无法使用keras的plot_model(model, 'my_model.png')问题 [tensorflow2.0.0]\n#### 在linux下你需要安装一些包：\n* pip install pydot==1.2.3\n* sudo apt-get install graphviz   \n#### 在windows中，同样需要安装一些包（windows比较麻烦）：\n* pip install pydot==1.2.3\n* 安装graphviz，并添加相关环境变量  \n参考连接：https://github.com/XifengGuo/CapsNet-Keras/issues/7\n\n## 为什么每计算一个batch，就需要调用一次optimizer.zero_grad() [Pytorch1.3]   \n如果不清除历史梯度，就会对计算的历史梯度进行累加（通过这个特性你能够变相实现一个很大batch数值的训练）   \n参考链接：https://www.zhihu.com/question/303070254    \n\n## Pytorch1.3 ImportError: cannot import name 'PILLOW_VERSION' [Pytorch1.3]  \npillow版本过高导致，安装版本号小于7.0.0即可"
  },
  {
    "path": "tensorflow_classification/ConfusionMatrix/class_indices.json",
    "content": "{\n    \"0\": \"daisy\",\n    \"1\": \"dandelion\",\n    \"2\": \"roses\",\n    \"3\": \"sunflowers\",\n    \"4\": \"tulips\"\n}"
  },
  {
    "path": "tensorflow_classification/ConfusionMatrix/main.py",
    "content": "import os\nimport math\nimport json\nimport glob\n\nfrom tensorflow.keras.preprocessing.image import ImageDataGenerator\nimport matplotlib.pyplot as plt\nimport tensorflow as tf\nimport numpy as np\nfrom tqdm import tqdm\nfrom prettytable import PrettyTable\n\nfrom model import MobileNetV2\n\n\nclass ConfusionMatrix(object):\n    \"\"\"\n    注意，如果显示的图像不全，是matplotlib版本问题\n    本例程使用matplotlib-3.2.1(windows and ubuntu)绘制正常\n    需要额外安装prettytable库\n    \"\"\"\n    def __init__(self, num_classes: int, labels: list):\n        self.matrix = np.zeros((num_classes, num_classes))\n        self.num_classes = num_classes\n        self.labels = labels\n\n    def update(self, preds, labels):\n        for p, t in zip(preds, labels):\n            self.matrix[p, t] += 1\n\n    def summary(self):\n        # calculate accuracy\n        sum_TP = 0\n        for i in range(self.num_classes):\n            sum_TP += self.matrix[i, i]\n        acc = sum_TP / np.sum(self.matrix)\n        print(\"the model accuracy is \", acc)\n\n        # precision, recall, specificity\n        table = PrettyTable()\n        table.field_names = [\"\", \"Precision\", \"Recall\", \"Specificity\"]\n        for i in range(self.num_classes):\n            TP = self.matrix[i, i]\n            FP = np.sum(self.matrix[i, :]) - TP\n            FN = np.sum(self.matrix[:, i]) - TP\n            TN = np.sum(self.matrix) - TP - FP - FN\n            Precision = round(TP / (TP + FP), 3) if TP + FP != 0 else 0.\n            Recall = round(TP / (TP + FN), 3) if TP + FN != 0 else 0.\n            Specificity = round(TN / (TN + FP), 3) if TN + FP != 0 else 0.\n            table.add_row([self.labels[i], Precision, Recall, Specificity])\n        print(table)\n\n    def plot(self):\n        matrix = self.matrix\n        print(matrix)\n        plt.imshow(matrix, cmap=plt.cm.Blues)\n\n        # 设置x轴坐标label\n        plt.xticks(range(self.num_classes), self.labels, rotation=45)\n        # 设置y轴坐标label\n        plt.yticks(range(self.num_classes), self.labels)\n        # 显示colorbar\n        plt.colorbar()\n        plt.xlabel('True Labels')\n        plt.ylabel('Predicted Labels')\n        plt.title('Confusion matrix')\n\n        # 在图中标注数量/概率信息\n        thresh = matrix.max() / 2\n        for x in range(self.num_classes):\n            for y in range(self.num_classes):\n                # 注意这里的matrix[y, x]不是matrix[x, y]\n                info = int(matrix[y, x])\n                plt.text(x, y, info,\n                         verticalalignment='center',\n                         horizontalalignment='center',\n                         color=\"white\" if info > thresh else \"black\")\n        plt.tight_layout()\n        plt.show()\n\n\nif __name__ == '__main__':\n    data_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\n    image_path = os.path.join(data_root, \"data_set\", \"flower_data\")  # flower data set path\n    validation_dir = os.path.join(image_path, \"val\")\n    assert os.path.exists(validation_dir), \"cannot find {}\".format(validation_dir)\n\n    im_height = 224\n    im_width = 224\n    batch_size = 16\n\n\n    def pre_function(img):\n        # img = im.open('test.jpg')\n        # img = np.array(img).astype(np.float32)\n        img = img / 255.\n        img = (img - 0.5) * 2.0\n        return img\n\n\n    # data generator with data augmentation\n    validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function)\n\n    val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,\n                                                                  batch_size=batch_size,\n                                                                  shuffle=False,\n                                                                  target_size=(im_height, im_width),\n                                                                  class_mode='categorical')\n    # img, _ = next(train_data_gen)\n    total_val = val_data_gen.n\n\n    model = MobileNetV2(num_classes=5)\n    # feature.build((None, 224, 224, 3))  # when using subclass model\n    pre_weights_path = './myMobileNet.ckpt'\n    assert len(glob.glob(pre_weights_path+\"*\")), \"cannot find {}\".format(pre_weights_path)\n    model.load_weights(pre_weights_path)\n\n    # read class_indict\n    label_path = './class_indices.json'\n    assert os.path.exists(label_path), \"cannot find {}\".format(label_path)\n    json_file = open(label_path, 'r')\n    class_indict = json.load(json_file)\n\n    labels = [label for _, label in class_indict.items()]\n    confusion = ConfusionMatrix(num_classes=5, labels=labels)\n\n    # validate\n    for step in tqdm(range(math.ceil(total_val / batch_size))):\n        val_images, val_labels = next(val_data_gen)\n        results = model.predict_on_batch(val_images)\n        results = tf.keras.layers.Softmax()(results).numpy()\n        results = np.argmax(results, axis=-1)\n        labels = np.argmax(val_labels, axis=-1)\n        confusion.update(results, labels)\n    confusion.plot()\n    confusion.summary()\n"
  },
  {
    "path": "tensorflow_classification/ConfusionMatrix/model.py",
    "content": "from tensorflow.keras import layers, Model, Sequential\n\n\ndef _make_divisible(ch, divisor=8, min_ch=None):\n    \"\"\"\n    This function is taken from the original tf repo.\n    It ensures that all layers have a channel number that is divisible by 8\n    It can be seen here:\n    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py\n    \"\"\"\n    if min_ch is None:\n        min_ch = divisor\n    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)\n    # Make sure that round down does not go down by more than 10%.\n    if new_ch < 0.9 * ch:\n        new_ch += divisor\n    return new_ch\n\n\nclass ConvBNReLU(layers.Layer):\n    def __init__(self, out_channel, kernel_size=3, stride=1, **kwargs):\n        super(ConvBNReLU, self).__init__(**kwargs)\n        self.conv = layers.Conv2D(filters=out_channel, kernel_size=kernel_size,\n                                  strides=stride, padding='SAME', use_bias=False, name='Conv2d')\n        self.bn = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name='BatchNorm')\n        self.activation = layers.ReLU(max_value=6.0)\n\n    def call(self, inputs, training=False, **kwargs):\n        x = self.conv(inputs)\n        x = self.bn(x, training=training)\n        x = self.activation(x)\n        return x\n\n\nclass InvertedResidual(layers.Layer):\n    def __init__(self, in_channel, out_channel, stride, expand_ratio, **kwargs):\n        super(InvertedResidual, self).__init__(**kwargs)\n        self.hidden_channel = in_channel * expand_ratio\n        self.use_shortcut = stride == 1 and in_channel == out_channel\n\n        layer_list = []\n        if expand_ratio != 1:\n            # 1x1 pointwise conv\n            layer_list.append(ConvBNReLU(out_channel=self.hidden_channel, kernel_size=1, name='expand'))\n        layer_list.extend([\n            # 3x3 depthwise conv\n            layers.DepthwiseConv2D(kernel_size=3, padding='SAME', strides=stride,\n                                   use_bias=False, name='depthwise'),\n            layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name='depthwise/BatchNorm'),\n            layers.ReLU(max_value=6.0),\n            # 1x1 pointwise conv(linear)\n            layers.Conv2D(filters=out_channel, kernel_size=1, strides=1,\n                          padding='SAME', use_bias=False, name='project'),\n            layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name='project/BatchNorm')\n        ])\n        self.main_branch = Sequential(layer_list, name='expanded_conv')\n\n    def call(self, inputs, **kwargs):\n        if self.use_shortcut:\n            return inputs + self.main_branch(inputs)\n        else:\n            return self.main_branch(inputs)\n\n\ndef MobileNetV2(im_height=224, im_width=224, num_classes=1000, alpha=1.0, round_nearest=8):\n    block = InvertedResidual\n    input_channel = _make_divisible(32 * alpha, round_nearest)\n    last_channel = _make_divisible(1280 * alpha, round_nearest)\n    inverted_residual_setting = [\n        # t, c, n, s\n        [1, 16, 1, 1],\n        [6, 24, 2, 2],\n        [6, 32, 3, 2],\n        [6, 64, 4, 2],\n        [6, 96, 3, 1],\n        [6, 160, 3, 2],\n        [6, 320, 1, 1],\n    ]\n\n    input_image = layers.Input(shape=(im_height, im_width, 3), dtype='float32')\n    # conv1\n    x = ConvBNReLU(input_channel, stride=2, name='Conv')(input_image)\n    # building inverted residual residual blockes\n    for t, c, n, s in inverted_residual_setting:\n        output_channel = _make_divisible(c * alpha, round_nearest)\n        for i in range(n):\n            stride = s if i == 0 else 1\n            x = block(x.shape[-1], output_channel, stride, expand_ratio=t)(x)\n    # building last several layers\n    x = ConvBNReLU(last_channel, kernel_size=1, name='Conv_1')(x)\n\n    # building classifier\n    x = layers.GlobalAveragePooling2D()(x)  # pool + flatten\n    x = layers.Dropout(0.2)(x)\n    output = layers.Dense(num_classes, name='Logits')(x)\n\n    model = Model(inputs=input_image, outputs=output)\n    return model\n"
  },
  {
    "path": "tensorflow_classification/ConvNeXt/model.py",
    "content": "import numpy as np\nimport tensorflow as tf\nfrom tensorflow.keras import layers, initializers, Model\n\nKERNEL_INITIALIZER = {\n    \"class_name\": \"TruncatedNormal\",\n    \"config\": {\n        \"stddev\": 0.2\n    }\n}\n\nBIAS_INITIALIZER = \"Zeros\"\n\n\nclass Block(layers.Layer):\n    \"\"\"\n    Args:\n        dim (int): Number of input channels.\n        drop_rate (float): Stochastic depth rate. Default: 0.0\n        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.\n    \"\"\"\n    def __init__(self, dim, drop_rate=0., layer_scale_init_value=1e-6, name: str = None):\n        super().__init__(name=name)\n        self.layer_scale_init_value = layer_scale_init_value\n        self.dwconv = layers.DepthwiseConv2D(7,\n                                             padding=\"same\",\n                                             depthwise_initializer=KERNEL_INITIALIZER,\n                                             bias_initializer=BIAS_INITIALIZER,\n                                             name=\"dwconv\")\n        self.norm = layers.LayerNormalization(epsilon=1e-6, name=\"norm\")\n        self.pwconv1 = layers.Dense(4 * dim,\n                                    kernel_initializer=KERNEL_INITIALIZER,\n                                    bias_initializer=BIAS_INITIALIZER,\n                                    name=\"pwconv1\")\n        self.act = layers.Activation(\"gelu\")\n        self.pwconv2 = layers.Dense(dim,\n                                    kernel_initializer=KERNEL_INITIALIZER,\n                                    bias_initializer=BIAS_INITIALIZER,\n                                    name=\"pwconv2\")\n        self.drop_path = layers.Dropout(drop_rate, noise_shape=(None, 1, 1, 1)) if drop_rate > 0 else None\n\n    def build(self, input_shape):\n        if self.layer_scale_init_value > 0:\n            self.gamma = self.add_weight(shape=[input_shape[-1]],\n                                         initializer=initializers.Constant(self.layer_scale_init_value),\n                                         trainable=True,\n                                         dtype=tf.float32,\n                                         name=\"gamma\")\n        else:\n            self.gamma = None\n\n    def call(self, x, training=False):\n        shortcut = x\n        x = self.dwconv(x)\n        x = self.norm(x, training=training)\n        x = self.pwconv1(x)\n        x = self.act(x)\n        x = self.pwconv2(x)\n\n        if self.gamma is not None:\n            x = self.gamma * x\n\n        if self.drop_path is not None:\n            x = self.drop_path(x, training=training)\n\n        return shortcut + x\n\n\nclass Stem(layers.Layer):\n    def __init__(self, dim, name: str = None):\n        super().__init__(name=name)\n        self.conv = layers.Conv2D(dim,\n                                  kernel_size=4,\n                                  strides=4,\n                                  padding=\"same\",\n                                  kernel_initializer=KERNEL_INITIALIZER,\n                                  bias_initializer=BIAS_INITIALIZER,\n                                  name=\"conv2d\")\n        self.norm = layers.LayerNormalization(epsilon=1e-6, name=\"norm\")\n\n    def call(self, x, training=False):\n        x = self.conv(x)\n        x = self.norm(x, training=training)\n        return x\n\n\nclass DownSample(layers.Layer):\n    def __init__(self, dim, name: str = None):\n        super().__init__(name=name)\n        self.norm = layers.LayerNormalization(epsilon=1e-6, name=\"norm\")\n        self.conv = layers.Conv2D(dim,\n                                  kernel_size=2,\n                                  strides=2,\n                                  padding=\"same\",\n                                  kernel_initializer=KERNEL_INITIALIZER,\n                                  bias_initializer=BIAS_INITIALIZER,\n                                  name=\"conv2d\")\n\n    def call(self, x, training=False):\n        x = self.norm(x, training=training)\n        x = self.conv(x)\n        return x\n\n\nclass ConvNeXt(Model):\n    r\"\"\" ConvNeXt\n        A Tensorflow impl of : `A ConvNet for the 2020s`  -\n          https://arxiv.org/pdf/2201.03545.pdf\n    Args:\n        num_classes (int): Number of classes for classification head. Default: 1000\n        depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]\n        dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]\n        drop_path_rate (float): Stochastic depth rate. Default: 0.\n        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.\n    \"\"\"\n    def __init__(self, num_classes: int, depths: list, dims: list, drop_path_rate: float = 0.,\n                 layer_scale_init_value: float = 1e-6):\n        super().__init__()\n        self.stem = Stem(dims[0], name=\"stem\")\n\n        cur = 0\n        dp_rates = np.linspace(start=0, stop=drop_path_rate, num=sum(depths))\n        self.stage1 = [Block(dim=dims[0],\n                             drop_rate=dp_rates[cur + i],\n                             layer_scale_init_value=layer_scale_init_value,\n                             name=f\"stage1_block{i}\")\n                       for i in range(depths[0])]\n        cur += depths[0]\n\n        self.downsample2 = DownSample(dims[1], name=\"downsample2\")\n        self.stage2 = [Block(dim=dims[1],\n                             drop_rate=dp_rates[cur + i],\n                             layer_scale_init_value=layer_scale_init_value,\n                             name=f\"stage2_block{i}\")\n                       for i in range(depths[1])]\n        cur += depths[1]\n\n        self.downsample3 = DownSample(dims[2], name=\"downsample3\")\n        self.stage3 = [Block(dim=dims[2],\n                             drop_rate=dp_rates[cur + i],\n                             layer_scale_init_value=layer_scale_init_value,\n                             name=f\"stage3_block{i}\")\n                       for i in range(depths[2])]\n        cur += depths[2]\n\n        self.downsample4 = DownSample(dims[3], name=\"downsample4\")\n        self.stage4 = [Block(dim=dims[3],\n                             drop_rate=dp_rates[cur + i],\n                             layer_scale_init_value=layer_scale_init_value,\n                             name=f\"stage4_block{i}\")\n                       for i in range(depths[3])]\n\n        self.norm = layers.LayerNormalization(epsilon=1e-6, name=\"norm\")\n        self.head = layers.Dense(units=num_classes,\n                                 kernel_initializer=KERNEL_INITIALIZER,\n                                 bias_initializer=BIAS_INITIALIZER,\n                                 name=\"head\")\n\n    def call(self, x, training=False):\n        x = self.stem(x, training=training)\n        for block in self.stage1:\n            x = block(x, training=training)\n\n        x = self.downsample2(x, training=training)\n        for block in self.stage2:\n            x = block(x, training=training)\n\n        x = self.downsample3(x, training=training)\n        for block in self.stage3:\n            x = block(x, training=training)\n\n        x = self.downsample4(x, training=training)\n        for block in self.stage4:\n            x = block(x, training=training)\n\n        x = tf.reduce_mean(x, axis=[1, 2])\n        x = self.norm(x, training=training)\n        x = self.head(x)\n        return x\n\n\ndef convnext_tiny(num_classes: int):\n    model = ConvNeXt(depths=[3, 3, 9, 3],\n                     dims=[96, 192, 384, 768],\n                     num_classes=num_classes)\n    return model\n\n\ndef convnext_small(num_classes: int):\n    model = ConvNeXt(depths=[3, 3, 27, 3],\n                     dims=[96, 192, 384, 768],\n                     num_classes=num_classes)\n    return model\n\n\ndef convnext_base(num_classes: int):\n    model = ConvNeXt(depths=[3, 3, 27, 3],\n                     dims=[128, 256, 512, 1024],\n                     num_classes=num_classes)\n    return model\n\n\ndef convnext_large(num_classes: int):\n    model = ConvNeXt(depths=[3, 3, 27, 3],\n                     dims=[192, 384, 768, 1536],\n                     num_classes=num_classes)\n    return model\n\n\ndef convnext_xlarge(num_classes: int):\n    model = ConvNeXt(depths=[3, 3, 27, 3],\n                     dims=[256, 512, 1024, 2048],\n                     num_classes=num_classes)\n    return model\n"
  },
  {
    "path": "tensorflow_classification/ConvNeXt/predict.py",
    "content": "import os\nimport json\nimport glob\nimport numpy as np\n\nfrom PIL import Image\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\n\nfrom model import convnext_tiny as create_model\n\n\ndef main():\n    num_classes = 5\n    im_height = im_width = 224\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n    # resize image\n    img = img.resize((im_width, im_height))\n    plt.imshow(img)\n\n    # read image\n    img = np.array(img).astype(np.float32)\n\n    # preprocess\n    img = (img / 255. - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]\n\n    # Add the image to a batch where it's the only member.\n    img = (np.expand_dims(img, 0))\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    # create model\n    model = create_model(num_classes=num_classes)\n    model.build([1, 224, 224, 3])\n\n    weights_path = './save_weights/model.ckpt'\n    assert len(glob.glob(weights_path+\"*\")), \"cannot find {}\".format(weights_path)\n    model.load_weights(weights_path)\n\n    result = np.squeeze(model.predict(img, batch_size=1))\n    result = tf.keras.layers.Softmax()(result)\n    predict_class = np.argmax(result)\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_class)],\n                                                 result[predict_class])\n    plt.title(print_res)\n    for i in range(len(result)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  result[i]))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/ConvNeXt/train.py",
    "content": "import os\nimport re\nimport sys\nimport datetime\n\nimport tensorflow as tf\nfrom tqdm import tqdm\n\nfrom model import convnext_tiny as create_model\nfrom utils import generate_ds, cosine_scheduler\n\nassert tf.version.VERSION >= \"2.4.0\", \"version of tf must greater/equal than 2.4.0\"\n\n\ndef main():\n    data_root = \"/data/flower_photos\"  # get data root path\n\n    if not os.path.exists(\"./save_weights\"):\n        os.makedirs(\"./save_weights\")\n\n    batch_size = 8\n    epochs = 10\n    num_classes = 5\n    freeze_layers = False\n    initial_lr = 0.005\n    weight_decay = 5e-4\n\n    log_dir = \"./logs/\" + datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n    train_writer = tf.summary.create_file_writer(os.path.join(log_dir, \"train\"))\n    val_writer = tf.summary.create_file_writer(os.path.join(log_dir, \"val\"))\n\n    # data generator with data augmentation\n    train_ds, val_ds = generate_ds(data_root, batch_size=batch_size, val_rate=0.2)\n\n    # create model\n    model = create_model(num_classes=num_classes)\n    model.build((1, 224, 224, 3))\n\n    # 下载我提前转好的预训练权重\n    # 链接: https://pan.baidu.com/s/1MtYJ3FCAkiPwaMRKuyZN1Q  密码: 1cgp\n    # load weights\n    pre_weights_path = './convnext_tiny_1k_224.h5'\n    assert os.path.exists(pre_weights_path), \"cannot find {}\".format(pre_weights_path)\n    model.load_weights(pre_weights_path, by_name=True, skip_mismatch=True)\n\n    # freeze bottom layers\n    if freeze_layers:\n        for layer in model.layers:\n            if \"head\" not in layer.name:\n                layer.trainable = False\n            else:\n                print(\"training {}\".format(layer.name))\n\n    model.summary()\n\n    # custom learning rate scheduler\n    scheduler = cosine_scheduler(initial_lr, epochs, len(train_ds), train_writer=train_writer)\n\n    # using keras low level api for training\n    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n    optimizer = tf.keras.optimizers.SGD(learning_rate=initial_lr, momentum=0.9)\n\n    train_loss = tf.keras.metrics.Mean(name='train_loss')\n    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')\n\n    val_loss = tf.keras.metrics.Mean(name='val_loss')\n    val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')\n\n    @tf.function\n    def train_step(train_images, train_labels):\n        with tf.GradientTape() as tape:\n            output = model(train_images, training=True)\n            ce_loss = loss_object(train_labels, output)\n\n            # l2 loss\n            matcher = re.compile(\".*(bias|gamma|beta).*\")\n            l2loss = weight_decay * tf.add_n([\n                tf.nn.l2_loss(v)\n                for v in model.trainable_variables\n                if not matcher.match(v.name)\n            ])\n\n            loss = ce_loss + l2loss\n\n        gradients = tape.gradient(loss, model.trainable_variables)\n        optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n        train_loss(ce_loss)\n        train_accuracy(train_labels, output)\n\n    @tf.function\n    def val_step(val_images, val_labels):\n        output = model(val_images, training=False)\n        loss = loss_object(val_labels, output)\n\n        val_loss(loss)\n        val_accuracy(val_labels, output)\n\n    best_val_acc = 0.\n    for epoch in range(epochs):\n        train_loss.reset_states()  # clear history info\n        train_accuracy.reset_states()  # clear history info\n        val_loss.reset_states()  # clear history info\n        val_accuracy.reset_states()  # clear history info\n\n        # train\n        train_bar = tqdm(train_ds, file=sys.stdout)\n        for images, labels in train_bar:\n            # update learning rate\n            optimizer.learning_rate = next(scheduler)\n\n            train_step(images, labels)\n\n            # print train process\n            train_bar.desc = \"train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}, lr:{:.5f}\".format(\n                epoch + 1,\n                epochs,\n                train_loss.result(),\n                train_accuracy.result(),\n                optimizer.learning_rate.numpy()\n            )\n\n        # validate\n        val_bar = tqdm(val_ds, file=sys.stdout)\n        for images, labels in val_bar:\n            val_step(images, labels)\n\n            # print val process\n            val_bar.desc = \"valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}\".format(epoch + 1,\n                                                                               epochs,\n                                                                               val_loss.result(),\n                                                                               val_accuracy.result())\n        # writing training loss and acc\n        with train_writer.as_default():\n            tf.summary.scalar(\"loss\", train_loss.result(), epoch)\n            tf.summary.scalar(\"accuracy\", train_accuracy.result(), epoch)\n\n        # writing validation loss and acc\n        with val_writer.as_default():\n            tf.summary.scalar(\"loss\", val_loss.result(), epoch)\n            tf.summary.scalar(\"accuracy\", val_accuracy.result(), epoch)\n\n        # only save best weights\n        if val_accuracy.result() > best_val_acc:\n            best_val_acc = val_accuracy.result()\n            save_name = \"./save_weights/model.ckpt\"\n            model.save_weights(save_name, save_format=\"tf\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/ConvNeXt/trans_weights.py",
    "content": "import torch\nfrom model import *\n\n\ndef transpose_weights(m_type, w_dict, k, v):\n    if m_type == \"conv\":\n        if len(v.shape) > 1:\n            # conv weights\n            v = np.transpose(v.numpy(), (2, 3, 1, 0)).astype(np.float32)\n        w_dict[k] = v\n    elif m_type == \"dwconv\":\n        if len(v.shape) > 1:\n            # dwconv weights\n            v = np.transpose(v.numpy(), (2, 3, 0, 1)).astype(np.float32)\n        w_dict[k] = v\n    elif m_type == \"linear\":\n        if len(v.shape) > 1:\n            v = np.transpose(v.numpy(), (1, 0)).astype(np.float32)\n        w_dict[k] = v\n    elif m_type == \"norm\":\n        w_dict[k] = v\n    else:\n        ValueError(f\"not support type:{m_type}\")\n\n\ndef main(weights_path: str,\n         model_name: str,\n         model: tf.keras.Model):\n    var_dict = {v.name.split(':')[0]: v for v in model.weights}\n\n    weights_dict = torch.load(weights_path, map_location=\"cpu\")[\"model\"]\n    w_dict = {}\n    for k, v in weights_dict.items():\n        if \"downsample_layers\" in k:\n            split_k = k.split(\".\")\n            if split_k[1] == \"0\":\n                if split_k[2] == \"0\":\n                    k = \"stem/conv2d/\" + split_k[-1]\n                    k = k.replace(\"weight\", \"kernel\")\n                    transpose_weights(\"conv\", w_dict, k, v)\n                else:\n                    k = \"stem/norm/\" + split_k[-1]\n                    k = k.replace(\"weight\", \"gamma\")\n                    k = k.replace(\"bias\", \"beta\")\n                    transpose_weights(\"norm\", w_dict, k, v)\n            else:\n                stage = int(split_k[1]) + 1\n                if split_k[2] == \"1\":\n                    k = f\"downsample{stage}/conv2d/\" + split_k[-1]\n                    k = k.replace(\"weight\", \"kernel\")\n                    transpose_weights(\"conv\", w_dict, k, v)\n                else:\n                    k = f\"downsample{stage}/norm/\" + split_k[-1]\n                    k = k.replace(\"weight\", \"gamma\")\n                    k = k.replace(\"bias\", \"beta\")\n                    transpose_weights(\"norm\", w_dict, k, v)\n        elif \"stages\" in k:\n            split_k = k.split(\".\")\n            stage = int(split_k[1]) + 1\n            block = int(split_k[2])\n            if \"dwconv\" in k:\n                k = f\"stage{stage}_block{block}/{split_k[-2]}/{split_k[-1]}\"\n                k = k.replace(\"weight\", \"depthwise_kernel\")\n                transpose_weights(\"dwconv\", w_dict, k, v)\n            elif \"pwconv\" in k:\n                k = f\"stage{stage}_block{block}/{split_k[-2]}/{split_k[-1]}\"\n                k = k.replace(\"weight\", \"kernel\")\n                transpose_weights(\"linear\", w_dict, k, v)\n            elif \"norm\" in k:\n                k = f\"stage{stage}_block{block}/{split_k[-2]}/{split_k[-1]}\"\n                k = k.replace(\"weight\", \"gamma\")\n                k = k.replace(\"bias\", \"beta\")\n                transpose_weights(\"norm\", w_dict, k, v)\n            elif \"gamma\" in k:\n                k = f\"stage{stage}_block{block}/{split_k[-1]}\"\n                transpose_weights(\"norm\", w_dict, k, v)\n            else:\n                ValueError(f\"unrecognized {k}\")\n        elif \"norm\" in k:\n            split_k = k.split(\".\")\n            k = f\"norm/{split_k[-1]}\"\n            k = k.replace(\"weight\", \"gamma\")\n            k = k.replace(\"bias\", \"beta\")\n            transpose_weights(\"norm\", w_dict, k, v)\n        elif \"head\" in k:\n            split_k = k.split(\".\")\n            k = f\"head/{split_k[-1]}\"\n            k = k.replace(\"weight\", \"kernel\")\n            transpose_weights(\"linear\", w_dict, k, v)\n        else:\n            ValueError(f\"unrecognized {k}\")\n\n    for key, var in var_dict.items():\n        if key in w_dict:\n            if w_dict[key].shape != var.shape:\n                msg = \"shape mismatch: {}\".format(key)\n                print(msg)\n            else:\n                var.assign(w_dict[key], read_value=False)\n        else:\n            msg = \"Not found {} in {}\".format(key, weights_path)\n            print(msg)\n\n    model.save_weights(\"./{}.h5\".format(model_name))\n\n\nif __name__ == '__main__':\n    model = convnext_tiny(num_classes=1000)\n    model.build((1, 224, 224, 3))\n    # https://dl.fbaipublicfiles.com/convnext/convnext_tiny_1k_224_ema.pth\n    main(weights_path=\"./convnext_tiny_1k_224_ema.pth\",\n         model_name=\"convnext_tiny_1k_224\",\n         model=model)\n\n    # model = convnext_small(num_classes=1000)\n    # model.build((1, 224, 224, 3))\n    # # https://dl.fbaipublicfiles.com/convnext/convnext_small_1k_224_ema.pth\n    # main(weights_path=\"./convnext_small_1k_224_ema.pth\",\n    #      model_name=\"convnext_small_1k_224\",\n    #      model=model)\n\n    # model = convnext_base(num_classes=1000)\n    # model.build((1, 224, 224, 3))\n    # # https://dl.fbaipublicfiles.com/convnext/convnext_base_1k_224_ema.pth\n    # main(weights_path=\"./convnext_base_1k_224_ema.pth\",\n    #      model_name=\"convnext_base_1k_224\",\n    #      model=model)\n\n    # model = convnext_base(num_classes=21841)\n    # model.build((1, 224, 224, 3))\n    # # https://dl.fbaipublicfiles.com/convnext/convnext_base_22k_224.pth\n    # main(weights_path=\"./convnext_base_22k_224.pth\",\n    #      model_name=\"convnext_base_22k_224\",\n    #      model=model)\n\n    # model = convnext_large(num_classes=1000)\n    # model.build((1, 224, 224, 3))\n    # # https://dl.fbaipublicfiles.com/convnext/convnext_large_1k_224_ema.pth\n    # main(weights_path=\"./convnext_large_1k_224_ema.pth\",\n    #      model_name=\"convnext_large_1k_224\",\n    #      model=model)\n\n    # model = convnext_large(num_classes=21841)\n    # model.build((1, 224, 224, 3))\n    # # https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_224.pth\n    # main(weights_path=\"./convnext_large_22k_224.pth\",\n    #      model_name=\"convnext_large_22k_224\",\n    #      model=model)\n\n"
  },
  {
    "path": "tensorflow_classification/ConvNeXt/utils.py",
    "content": "import os\nimport json\nimport random\nimport math\n\nimport numpy as np\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\n\n\ndef read_split_data(root: str, val_rate: float = 0.2):\n    random.seed(0)  # 保证随机划分结果一致\n    assert os.path.exists(root), \"dataset root: {} does not exist.\".format(root)\n\n    # 遍历文件夹，一个文件夹对应一个类别\n    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]\n    # 排序，保证顺序一致\n    flower_class.sort()\n    # 生成类别名称以及对应的数字索引\n    class_indices = dict((k, v) for v, k in enumerate(flower_class))\n    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    train_images_path = []  # 存储训练集的所有图片路径\n    train_images_label = []  # 存储训练集图片对应索引信息\n    val_images_path = []  # 存储验证集的所有图片路径\n    val_images_label = []  # 存储验证集图片对应索引信息\n    every_class_num = []  # 存储每个类别的样本总数\n    supported = [\".jpg\", \".JPG\", \".jpeg\", \".JPEG\"]  # 支持的文件后缀类型\n    # 遍历每个文件夹下的文件\n    for cla in flower_class:\n        cla_path = os.path.join(root, cla)\n        # 遍历获取supported支持的所有文件路径\n        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)\n                  if os.path.splitext(i)[-1] in supported]\n        # 获取该类别对应的索引\n        image_class = class_indices[cla]\n        # 记录该类别的样本数量\n        every_class_num.append(len(images))\n        # 按比例随机采样验证样本\n        val_path = random.sample(images, k=int(len(images) * val_rate))\n\n        for img_path in images:\n            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集\n                val_images_path.append(img_path)\n                val_images_label.append(image_class)\n            else:  # 否则存入训练集\n                train_images_path.append(img_path)\n                train_images_label.append(image_class)\n\n    print(\"{} images were found in the dataset.\\n{} for training, {} for validation\".format(sum(every_class_num),\n                                                                                            len(train_images_path),\n                                                                                            len(val_images_path)\n                                                                                            ))\n\n    plot_image = False\n    if plot_image:\n        # 绘制每种类别个数柱状图\n        plt.bar(range(len(flower_class)), every_class_num, align='center')\n        # 将横坐标0,1,2,3,4替换为相应的类别名称\n        plt.xticks(range(len(flower_class)), flower_class)\n        # 在柱状图上添加数值标签\n        for i, v in enumerate(every_class_num):\n            plt.text(x=i, y=v + 5, s=str(v), ha='center')\n        # 设置x坐标\n        plt.xlabel('image class')\n        # 设置y坐标\n        plt.ylabel('number of images')\n        # 设置柱状图的标题\n        plt.title('flower class distribution')\n        plt.show()\n\n    return train_images_path, train_images_label, val_images_path, val_images_label\n\n\ndef generate_ds(data_root: str,\n                train_im_height: int = 224,\n                train_im_width: int = 224,\n                val_im_height: int = None,\n                val_im_width: int = None,\n                batch_size: int = 8,\n                val_rate: float = 0.1,\n                cache_data: bool = False):\n    \"\"\"\n    读取划分数据集，并生成训练集和验证集的迭代器\n    :param data_root: 数据根目录\n    :param train_im_height: 训练输入网络图像的高度\n    :param train_im_width:  训练输入网络图像的宽度\n    :param val_im_height: 验证输入网络图像的高度\n    :param val_im_width:  验证输入网络图像的宽度\n    :param batch_size: 训练使用的batch size\n    :param val_rate:  将数据按给定比例划分到验证集\n    :param cache_data: 是否缓存数据\n    :return:\n    \"\"\"\n    assert train_im_height is not None\n    assert train_im_width is not None\n    if val_im_width is None:\n        val_im_width = train_im_width\n    if val_im_height is None:\n        val_im_height = train_im_height\n\n    train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate)\n    AUTOTUNE = tf.data.experimental.AUTOTUNE\n\n    def process_train_info(img_path, label):\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image, channels=3)\n        image = tf.cast(image, tf.float32)\n        image = tf.image.resize_with_crop_or_pad(image, train_im_height, train_im_width)\n        image = tf.image.random_flip_left_right(image)\n        image = (image / 255. - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]\n        return image, label\n\n    def process_val_info(img_path, label):\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image, channels=3)\n        image = tf.cast(image, tf.float32)\n        image = tf.image.resize_with_crop_or_pad(image, val_im_height, val_im_width)\n        image = (image / 255. - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]\n        return image, label\n\n    # Configure dataset for performance\n    def configure_for_performance(ds,\n                                  shuffle_size: int,\n                                  shuffle: bool = False,\n                                  cache: bool = False):\n        if cache:\n            ds = ds.cache()  # 读取数据后缓存至内存\n        if shuffle:\n            ds = ds.shuffle(buffer_size=shuffle_size)  # 打乱数据顺序\n        ds = ds.batch(batch_size)                      # 指定batch size\n        ds = ds.prefetch(buffer_size=AUTOTUNE)         # 在训练的同时提前准备下一个step的数据\n        return ds\n\n    train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path),\n                                                   tf.constant(train_img_label)))\n    total_train = len(train_img_path)\n\n    # Use Dataset.map to create a dataset of image, label pairs\n    train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE)\n    train_ds = configure_for_performance(train_ds, total_train, shuffle=True, cache=cache_data)\n\n    val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path),\n                                                 tf.constant(val_img_label)))\n    total_val = len(val_img_path)\n    # Use Dataset.map to create a dataset of image, label pairs\n    val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE)\n    val_ds = configure_for_performance(val_ds, total_val, cache=False)\n\n    return train_ds, val_ds\n\n\ndef cosine_rate(now_step, total_step, end_lr_rate):\n    rate = ((1 + math.cos(now_step * math.pi / total_step)) / 2) * (1 - end_lr_rate) + end_lr_rate  # cosine\n    return rate\n\n\ndef cosine_scheduler(initial_lr, epochs, steps, warmup_epochs=1, end_lr_rate=1e-6, train_writer=None):\n    \"\"\"custom learning rate scheduler\"\"\"\n    assert warmup_epochs < epochs\n    warmup = np.linspace(start=1e-8, stop=initial_lr, num=warmup_epochs*steps)\n    remainder_steps = (epochs - warmup_epochs) * steps\n    cosine = initial_lr * np.array([cosine_rate(i, remainder_steps, end_lr_rate) for i in range(remainder_steps)])\n    lr_list = np.concatenate([warmup, cosine])\n\n    for i in range(len(lr_list)):\n        new_lr = lr_list[i]\n        if train_writer is not None:\n            # writing lr into tensorboard\n            with train_writer.as_default():\n                tf.summary.scalar('learning rate', data=new_lr, step=i)\n        yield new_lr\n"
  },
  {
    "path": "tensorflow_classification/README.md",
    "content": "## 该文件夹存放使用tensorflow实现的代码版本\n**model.py**： 是模型文件  \n**train.py**： 是调用模型训练的文件    \n**predict.py**： 是调用模型进行预测的文件  \n**class_indices.json**： 是训练数据集对应的标签文件   \n\n------\n若要使用该训练脚本需要下载对应的花分类数据集并将其划分为训练集和验证集。   \n[点击这里](../data_set/README.md)会告诉你如何去下载数据集，以及提供了现成的划分数据集脚本  "
  },
  {
    "path": "tensorflow_classification/Test11_efficientnetV2/model.py",
    "content": "\"\"\"\nofficial code:\nhttps://github.com/google/automl/tree/master/efficientnetv2\n\"\"\"\n\nimport itertools\n\nimport tensorflow as tf\nfrom tensorflow.keras import layers, Model, Input\n\n\nCONV_KERNEL_INITIALIZER = {\n    'class_name': 'VarianceScaling',\n    'config': {\n        'scale': 2.0,\n        'mode': 'fan_out',\n        'distribution': 'truncated_normal'\n    }\n}\n\nDENSE_KERNEL_INITIALIZER = {\n    'class_name': 'VarianceScaling',\n    'config': {\n        'scale': 1. / 3.,\n        'mode': 'fan_out',\n        'distribution': 'uniform'\n    }\n}\n\n\nclass SE(layers.Layer):\n    def __init__(self,\n                 se_filters: int,\n                 output_filters: int,\n                 name: str = None):\n        super(SE, self).__init__(name=name)\n\n        self.se_reduce = layers.Conv2D(filters=se_filters,\n                                       kernel_size=1,\n                                       strides=1,\n                                       padding=\"same\",\n                                       activation=\"swish\",\n                                       use_bias=True,\n                                       kernel_initializer=CONV_KERNEL_INITIALIZER,\n                                       name=\"conv2d\")\n\n        self.se_expand = layers.Conv2D(filters=output_filters,\n                                       kernel_size=1,\n                                       strides=1,\n                                       padding=\"same\",\n                                       activation=\"sigmoid\",\n                                       use_bias=True,\n                                       kernel_initializer=CONV_KERNEL_INITIALIZER,\n                                       name=\"conv2d_1\")\n\n    def call(self, inputs, **kwargs):\n        # Tensor: [N, H, W, C] -> [N, 1, 1, C]\n        se_tensor = tf.reduce_mean(inputs, [1, 2], keepdims=True)\n        se_tensor = self.se_reduce(se_tensor)\n        se_tensor = self.se_expand(se_tensor)\n        return se_tensor * inputs\n\n\nclass MBConv(layers.Layer):\n    def __init__(self,\n                 kernel_size: int,\n                 input_c: int,\n                 out_c: int,\n                 expand_ratio: int,\n                 stride: int,\n                 se_ratio: float = 0.25,\n                 drop_rate: float = 0.,\n                 name: str = None):\n        super(MBConv, self).__init__(name=name)\n\n        if stride not in [1, 2]:\n            raise ValueError(\"illegal stride value.\")\n\n        self.has_shortcut = (stride == 1 and input_c == out_c)\n        expanded_c = input_c * expand_ratio\n\n        bid = itertools.count(0)\n        get_norm_name = lambda: 'batch_normalization' + ('' if not next(\n            bid) else '_' + str(next(bid) // 2))\n        cid = itertools.count(0)\n        get_conv_name = lambda: 'conv2d' + ('' if not next(cid) else '_' + str(\n            next(cid) // 2))\n\n        # 在EfficientNetV2中，MBConv中不存在expansion=1的情况所以conv_pw肯定存在\n        assert expand_ratio != 1\n        # Point-wise expansion\n        self.expand_conv = layers.Conv2D(\n            filters=expanded_c,\n            kernel_size=1,\n            strides=1,\n            padding=\"same\",\n            use_bias=False,\n            name=get_conv_name())\n        self.norm0 = layers.BatchNormalization(\n            axis=-1,\n            momentum=0.9,\n            epsilon=1e-3,\n            name=get_norm_name())\n        self.act0 = layers.Activation(\"swish\")\n\n        # Depth-wise convolution\n        self.depthwise_conv = layers.DepthwiseConv2D(\n            kernel_size=kernel_size,\n            strides=stride,\n            depthwise_initializer=CONV_KERNEL_INITIALIZER,\n            padding=\"same\",\n            use_bias=False,\n            name=\"depthwise_conv2d\")\n        self.norm1 = layers.BatchNormalization(\n            axis=-1,\n            momentum=0.9,\n            epsilon=1e-3,\n            name=get_norm_name())\n        self.act1 = layers.Activation(\"swish\")\n\n        # SE\n        num_reduced_filters = max(1, int(input_c * se_ratio))\n        self.se = SE(num_reduced_filters, expanded_c, name=\"se\")\n\n        # Point-wise linear projection\n        self.project_conv = layers.Conv2D(\n            filters=out_c,\n            kernel_size=1,\n            strides=1,\n            kernel_initializer=CONV_KERNEL_INITIALIZER,\n            padding=\"same\",\n            use_bias=False,\n            name=get_conv_name())\n        self.norm2 = layers.BatchNormalization(\n            axis=-1,\n            momentum=0.9,\n            epsilon=1e-3,\n            name=get_norm_name())\n\n        self.drop_rate = drop_rate\n        if self.has_shortcut and drop_rate > 0:\n            # Stochastic Depth\n            self.drop_path = layers.Dropout(rate=drop_rate,\n                                            noise_shape=(None, 1, 1, 1),  # binary dropout mask\n                                            name=\"drop_path\")\n\n    def call(self, inputs, training=None):\n        x = inputs\n\n        x = self.expand_conv(x)\n        x = self.norm0(x, training=training)\n        x = self.act0(x)\n\n        x = self.depthwise_conv(x)\n        x = self.norm1(x, training=training)\n        x = self.act1(x)\n\n        x = self.se(x)\n\n        x = self.project_conv(x)\n        x = self.norm2(x, training=training)\n\n        if self.has_shortcut:\n            if self.drop_rate > 0:\n                x = self.drop_path(x, training=training)\n\n            x = tf.add(x, inputs)\n\n        return x\n\n\nclass FusedMBConv(layers.Layer):\n    def __init__(self,\n                 kernel_size: int,\n                 input_c: int,\n                 out_c: int,\n                 expand_ratio: int,\n                 stride: int,\n                 se_ratio: float,\n                 drop_rate: float = 0.,\n                 name: str = None):\n        super(FusedMBConv, self).__init__(name=name)\n        if stride not in [1, 2]:\n            raise ValueError(\"illegal stride value.\")\n\n        assert se_ratio == 0.\n\n        self.has_shortcut = (stride == 1 and input_c == out_c)\n        self.has_expansion = expand_ratio != 1\n        expanded_c = input_c * expand_ratio\n\n        bid = itertools.count(0)\n        get_norm_name = lambda: 'batch_normalization' + ('' if not next(\n            bid) else '_' + str(next(bid) // 2))\n        cid = itertools.count(0)\n        get_conv_name = lambda: 'conv2d' + ('' if not next(cid) else '_' + str(\n            next(cid) // 2))\n\n        if expand_ratio != 1:\n            self.expand_conv = layers.Conv2D(\n                filters=expanded_c,\n                kernel_size=kernel_size,\n                strides=stride,\n                kernel_initializer=CONV_KERNEL_INITIALIZER,\n                padding=\"same\",\n                use_bias=False,\n                name=get_conv_name())\n            self.norm0 = layers.BatchNormalization(\n                axis=-1,\n                momentum=0.9,\n                epsilon=1e-3,\n                name=get_norm_name())\n            self.act0 = layers.Activation(\"swish\")\n\n        self.project_conv = layers.Conv2D(\n            filters=out_c,\n            kernel_size=1 if expand_ratio != 1 else kernel_size,\n            strides=1 if expand_ratio != 1 else stride,\n            kernel_initializer=CONV_KERNEL_INITIALIZER,\n            padding=\"same\",\n            use_bias=False,\n            name=get_conv_name())\n        self.norm1 = layers.BatchNormalization(\n            axis=-1,\n            momentum=0.9,\n            epsilon=1e-3,\n            name=get_norm_name())\n\n        if expand_ratio == 1:\n            self.act1 = layers.Activation(\"swish\")\n\n        self.drop_rate = drop_rate\n        if self.has_shortcut and drop_rate > 0:\n            # Stochastic Depth\n            self.drop_path = layers.Dropout(rate=drop_rate,\n                                            noise_shape=(None, 1, 1, 1),  # binary dropout mask\n                                            name=\"drop_path\")\n\n    def call(self, inputs, training=None):\n        x = inputs\n        if self.has_expansion:\n            x = self.expand_conv(x)\n            x = self.norm0(x, training=training)\n            x = self.act0(x)\n\n        x = self.project_conv(x)\n        x = self.norm1(x, training=training)\n        if self.has_expansion is False:\n            x = self.act1(x)\n\n        if self.has_shortcut:\n            if self.drop_rate > 0:\n                x = self.drop_path(x, training=training)\n\n            x = tf.add(x, inputs)\n\n        return x\n\n\nclass Stem(layers.Layer):\n    def __init__(self, filters: int, name: str = None):\n        super(Stem, self).__init__(name=name)\n        self.conv_stem = layers.Conv2D(\n            filters=filters,\n            kernel_size=3,\n            strides=2,\n            kernel_initializer=CONV_KERNEL_INITIALIZER,\n            padding=\"same\",\n            use_bias=False,\n            name=\"conv2d\")\n        self.norm = layers.BatchNormalization(\n            axis=-1,\n            momentum=0.9,\n            epsilon=1e-3,\n            name=\"batch_normalization\")\n        self.act = layers.Activation(\"swish\")\n\n    def call(self, inputs, training=None):\n        x = self.conv_stem(inputs)\n        x = self.norm(x, training=training)\n        x = self.act(x)\n\n        return x\n\n\nclass Head(layers.Layer):\n    def __init__(self,\n                 filters: int = 1280,\n                 num_classes: int = 1000,\n                 drop_rate: float = 0.,\n                 name: str = None):\n        super(Head, self).__init__(name=name)\n        self.conv_head = layers.Conv2D(\n            filters=filters,\n            kernel_size=1,\n            kernel_initializer=CONV_KERNEL_INITIALIZER,\n            padding=\"same\",\n            use_bias=False,\n            name=\"conv2d\")\n        self.norm = layers.BatchNormalization(\n            axis=-1,\n            momentum=0.9,\n            epsilon=1e-3,\n            name=\"batch_normalization\")\n        self.act = layers.Activation(\"swish\")\n\n        self.avg = layers.GlobalAveragePooling2D()\n        self.fc = layers.Dense(num_classes,\n                               kernel_initializer=DENSE_KERNEL_INITIALIZER)\n\n        if drop_rate > 0:\n            self.dropout = layers.Dropout(drop_rate)\n\n    def call(self, inputs, training=None):\n        x = self.conv_head(inputs)\n        x = self.norm(x)\n        x = self.act(x)\n        x = self.avg(x)\n\n        if self.dropout:\n            x = self.dropout(x, training=training)\n\n        x = self.fc(x)\n        return x\n\n\nclass EfficientNetV2(Model):\n    def __init__(self,\n                 model_cnf: list,\n                 num_classes: int = 1000,\n                 num_features: int = 1280,\n                 dropout_rate: float = 0.2,\n                 drop_connect_rate: float = 0.2,\n                 name: str = None):\n        super(EfficientNetV2, self).__init__(name=name)\n\n        for cnf in model_cnf:\n            assert len(cnf) == 8\n\n        stem_filter_num = model_cnf[0][4]\n        self.stem = Stem(stem_filter_num)\n\n        total_blocks = sum([i[0] for i in model_cnf])\n        block_id = 0\n        self.blocks = []\n        # Builds blocks.\n        for cnf in model_cnf:\n            repeats = cnf[0]\n            op = FusedMBConv if cnf[-2] == 0 else MBConv\n            for i in range(repeats):\n                self.blocks.append(op(kernel_size=cnf[1],\n                                      input_c=cnf[4] if i == 0 else cnf[5],\n                                      out_c=cnf[5],\n                                      expand_ratio=cnf[3],\n                                      stride=cnf[2] if i == 0 else 1,\n                                      se_ratio=cnf[-1],\n                                      drop_rate=drop_connect_rate * block_id / total_blocks,\n                                      name=\"blocks_{}\".format(block_id)))\n                block_id += 1\n\n        self.head = Head(num_features, num_classes, dropout_rate)\n\n    # def summary(self, input_shape=(224, 224, 3), **kwargs):\n    #     x = Input(shape=input_shape)\n    #     model = Model(inputs=[x], outputs=self.call(x, training=True))\n    #     return model.summary()\n\n    def call(self, inputs, training=None):\n        x = self.stem(inputs, training)\n\n        # call for blocks.\n        for _, block in enumerate(self.blocks):\n            x = block(x, training=training)\n\n        x = self.head(x, training=training)\n\n        return x\n\n\ndef efficientnetv2_s(num_classes: int = 1000):\n    \"\"\"\n    EfficientNetV2\n    https://arxiv.org/abs/2104.00298\n    \"\"\"\n    # train_size: 300, eval_size: 384\n\n    # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio\n    model_config = [[2, 3, 1, 1, 24, 24, 0, 0],\n                    [4, 3, 2, 4, 24, 48, 0, 0],\n                    [4, 3, 2, 4, 48, 64, 0, 0],\n                    [6, 3, 2, 4, 64, 128, 1, 0.25],\n                    [9, 3, 1, 6, 128, 160, 1, 0.25],\n                    [15, 3, 2, 6, 160, 256, 1, 0.25]]\n\n    model = EfficientNetV2(model_cnf=model_config,\n                           num_classes=num_classes,\n                           dropout_rate=0.2,\n                           name=\"efficientnetv2-s\")\n    return model\n\n\ndef efficientnetv2_m(num_classes: int = 1000):\n    \"\"\"\n    EfficientNetV2\n    https://arxiv.org/abs/2104.00298\n    \"\"\"\n    # train_size: 384, eval_size: 480\n\n    # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio\n    model_config = [[3, 3, 1, 1, 24, 24, 0, 0],\n                    [5, 3, 2, 4, 24, 48, 0, 0],\n                    [5, 3, 2, 4, 48, 80, 0, 0],\n                    [7, 3, 2, 4, 80, 160, 1, 0.25],\n                    [14, 3, 1, 6, 160, 176, 1, 0.25],\n                    [18, 3, 2, 6, 176, 304, 1, 0.25],\n                    [5, 3, 1, 6, 304, 512, 1, 0.25]]\n\n    model = EfficientNetV2(model_cnf=model_config,\n                           num_classes=num_classes,\n                           dropout_rate=0.3,\n                           name=\"efficientnetv2-m\")\n    return model\n\n\ndef efficientnetv2_l(num_classes: int = 1000):\n    \"\"\"\n    EfficientNetV2\n    https://arxiv.org/abs/2104.00298\n    \"\"\"\n    # train_size: 384, eval_size: 480\n\n    # repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio\n    model_config = [[4, 3, 1, 1, 32, 32, 0, 0],\n                    [7, 3, 2, 4, 32, 64, 0, 0],\n                    [7, 3, 2, 4, 64, 96, 0, 0],\n                    [10, 3, 2, 4, 96, 192, 1, 0.25],\n                    [19, 3, 1, 6, 192, 224, 1, 0.25],\n                    [25, 3, 2, 6, 224, 384, 1, 0.25],\n                    [7, 3, 1, 6, 384, 640, 1, 0.25]]\n\n    model = EfficientNetV2(model_cnf=model_config,\n                           num_classes=num_classes,\n                           dropout_rate=0.4,\n                           name=\"efficientnetv2-l\")\n    return model\n\n\n# m = efficientnetv2_s()\n# m.summary()\n"
  },
  {
    "path": "tensorflow_classification/Test11_efficientnetV2/predict.py",
    "content": "import os\nimport json\nimport glob\nimport numpy as np\n\nfrom PIL import Image\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\n\nfrom model import efficientnetv2_s as create_model\n\n\ndef main():\n    num_classes = 5\n\n    img_size = {\"s\": 384,\n                \"m\": 480,\n                \"l\": 480}\n    num_model = \"s\"\n    im_height = im_width = img_size[num_model]\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n    # resize image\n    img = img.resize((im_width, im_height))\n    plt.imshow(img)\n\n    # read image\n    img = np.array(img).astype(np.float32)\n\n    # preprocess\n    img = (img / 255. - 0.5) / 0.5\n\n    # Add the image to a batch where it's the only member.\n    img = (np.expand_dims(img, 0))\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    # create model\n    model = create_model(num_classes=num_classes)\n\n    weights_path = './save_weights/efficientnetv2.ckpt'\n    assert len(glob.glob(weights_path+\"*\")), \"cannot find {}\".format(weights_path)\n    model.load_weights(weights_path)\n\n    result = np.squeeze(model.predict(img))\n    result = tf.keras.layers.Softmax()(result)\n    predict_class = np.argmax(result)\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_class)],\n                                                 result[predict_class])\n    plt.title(print_res)\n    for i in range(len(result)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  result[i]))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test11_efficientnetV2/train.py",
    "content": "import os\nimport sys\nimport math\nimport datetime\n\nimport tensorflow as tf\nfrom tqdm import tqdm\n\nfrom model import efficientnetv2_s as create_model\nfrom utils import generate_ds\n\nassert tf.version.VERSION >= \"2.4.0\", \"version of tf must greater/equal than 2.4.0\"\n\n\ndef main():\n    data_root = \"/data/flower_photos\"  # get data root path\n\n    if not os.path.exists(\"./save_weights\"):\n        os.makedirs(\"./save_weights\")\n\n    img_size = {\"s\": [300, 384],  # train_size, val_size\n                \"m\": [384, 480],\n                \"l\": [384, 480]}\n    num_model = \"s\"\n\n    batch_size = 8\n    epochs = 30\n    num_classes = 5\n    freeze_layers = True\n    initial_lr = 0.01\n\n    log_dir = \"./logs/\" + datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n    train_writer = tf.summary.create_file_writer(os.path.join(log_dir, \"train\"))\n    val_writer = tf.summary.create_file_writer(os.path.join(log_dir, \"val\"))\n\n    # data generator with data augmentation\n    train_ds, val_ds = generate_ds(data_root,\n                                   train_im_height=img_size[num_model][0],\n                                   train_im_width=img_size[num_model][0],\n                                   val_im_height=img_size[num_model][1],\n                                   val_im_width=img_size[num_model][1],\n                                   batch_size=batch_size)\n\n    # create model\n    model = create_model(num_classes=num_classes)\n    model.build((1, img_size[num_model][0], img_size[num_model][0], 3))\n\n    # 下载我提前转好的预训练权重\n    # 链接: https://pan.baidu.com/s/1Pr-pO5sQVySPQnBY8pQH7w  密码: f6hi\n    # load weights\n    pre_weights_path = './efficientnetv2-s.h5'\n    assert os.path.exists(pre_weights_path), \"cannot find {}\".format(pre_weights_path)\n    model.load_weights(pre_weights_path, by_name=True, skip_mismatch=True)\n\n    # freeze bottom layers\n    if freeze_layers:\n        unfreeze_layers = \"head\"\n        for layer in model.layers:\n            if unfreeze_layers not in layer.name:\n                layer.trainable = False\n            else:\n                print(\"training {}\".format(layer.name))\n\n    model.summary()\n\n    # custom learning rate curve\n    def scheduler(now_epoch):\n        end_lr_rate = 0.01  # end_lr = initial_lr * end_lr_rate\n        rate = ((1 + math.cos(now_epoch * math.pi / epochs)) / 2) * (1 - end_lr_rate) + end_lr_rate  # cosine\n        new_lr = rate * initial_lr\n\n        # writing lr into tensorboard\n        with train_writer.as_default():\n            tf.summary.scalar('learning rate', data=new_lr, step=epoch)\n\n        return new_lr\n\n    # using keras low level api for training\n    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n    optimizer = tf.keras.optimizers.SGD(learning_rate=initial_lr, momentum=0.9)\n\n    train_loss = tf.keras.metrics.Mean(name='train_loss')\n    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')\n\n    val_loss = tf.keras.metrics.Mean(name='val_loss')\n    val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')\n\n    @tf.function\n    def train_step(train_images, train_labels):\n        with tf.GradientTape() as tape:\n            output = model(train_images, training=True)\n            loss = loss_object(train_labels, output)\n        gradients = tape.gradient(loss, model.trainable_variables)\n        optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n\n        train_loss(loss)\n        train_accuracy(train_labels, output)\n\n    @tf.function\n    def val_step(val_images, val_labels):\n        output = model(val_images, training=False)\n        loss = loss_object(val_labels, output)\n\n        val_loss(loss)\n        val_accuracy(val_labels, output)\n\n    best_val_acc = 0.\n    for epoch in range(epochs):\n        train_loss.reset_states()  # clear history info\n        train_accuracy.reset_states()  # clear history info\n        val_loss.reset_states()  # clear history info\n        val_accuracy.reset_states()  # clear history info\n\n        # train\n        train_bar = tqdm(train_ds, file=sys.stdout)\n        for images, labels in train_bar:\n            train_step(images, labels)\n\n            # print train process\n            train_bar.desc = \"train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}\".format(epoch + 1,\n                                                                                 epochs,\n                                                                                 train_loss.result(),\n                                                                                 train_accuracy.result())\n\n        # update learning rate\n        optimizer.learning_rate = scheduler(epoch)\n\n        # validate\n        val_bar = tqdm(val_ds, file=sys.stdout)\n        for images, labels in val_bar:\n            val_step(images, labels)\n\n            # print val process\n            val_bar.desc = \"valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}\".format(epoch + 1,\n                                                                               epochs,\n                                                                               val_loss.result(),\n                                                                               val_accuracy.result())\n        # writing training loss and acc\n        with train_writer.as_default():\n            tf.summary.scalar(\"loss\", train_loss.result(), epoch)\n            tf.summary.scalar(\"accuracy\", train_accuracy.result(), epoch)\n\n        # writing validation loss and acc\n        with val_writer.as_default():\n            tf.summary.scalar(\"loss\", val_loss.result(), epoch)\n            tf.summary.scalar(\"accuracy\", val_accuracy.result(), epoch)\n\n        # only save best weights\n        if val_accuracy.result() > best_val_acc:\n            best_val_acc = val_accuracy.result()\n            save_name = \"./save_weights/efficientnetv2.ckpt\"\n            model.save_weights(save_name, save_format=\"tf\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test11_efficientnetV2/trans_weights.py",
    "content": "from model import *\n\n\ndef main(ckpt_path: str,\n         model_name: str,\n         model: tf.keras.Model):\n    var_dict = {v.name.split(':')[0]: v for v in model.weights}\n\n    reader = tf.train.load_checkpoint(ckpt_path)\n    var_shape_map = reader.get_variable_to_shape_map()\n\n    for key, var in var_dict.items():\n        key_ = model_name + \"/\" + key\n        key_ = key_.replace(\"batch_normalization\", \"tpu_batch_normalization\")\n        if key_ in var_shape_map:\n            if var_shape_map[key_] != var.shape:\n                msg = \"shape mismatch: {}\".format(key)\n                print(msg)\n            else:\n                var.assign(reader.get_tensor(key_), read_value=False)\n        else:\n            msg = \"Not found {} in {}\".format(key, ckpt_path)\n            print(msg)\n\n    model.save_weights(\"./{}.h5\".format(model_name))\n\n\nif __name__ == '__main__':\n    model = efficientnetv2_s()\n    model.build((1, 224, 224, 3))\n    main(ckpt_path=\"./efficientnetv2-s-21k-ft1k/model\",\n         model_name=\"efficientnetv2-s\",\n         model=model)\n\n    # model = efficientnetv2_m()\n    # model.build((1, 224, 224, 3))\n    # main(ckpt_path=\"./efficientnetv2-m-21k-ft1k/model\",\n    #      model_name=\"efficientnetv2-m\",\n    #      model=model)\n\n    # model = efficientnetv2_l()\n    # model.build((1, 224, 224, 3))\n    # main(ckpt_path=\"./efficientnetv2-l-21k-ft1k/model\",\n    #      model_name=\"efficientnetv2-l\",\n    #      model=model)\n"
  },
  {
    "path": "tensorflow_classification/Test11_efficientnetV2/utils.py",
    "content": "import os\nimport json\nimport random\n\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\n\n\ndef read_split_data(root: str, val_rate: float = 0.2):\n    random.seed(0)  # 保证随机划分结果一致\n    assert os.path.exists(root), \"dataset root: {} does not exist.\".format(root)\n\n    # 遍历文件夹，一个文件夹对应一个类别\n    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]\n    # 排序，保证顺序一致\n    flower_class.sort()\n    # 生成类别名称以及对应的数字索引\n    class_indices = dict((k, v) for v, k in enumerate(flower_class))\n    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    train_images_path = []  # 存储训练集的所有图片路径\n    train_images_label = []  # 存储训练集图片对应索引信息\n    val_images_path = []  # 存储验证集的所有图片路径\n    val_images_label = []  # 存储验证集图片对应索引信息\n    every_class_num = []  # 存储每个类别的样本总数\n    supported = [\".jpg\", \".JPG\", \".jpeg\", \".JPEG\"]  # 支持的文件后缀类型\n    # 遍历每个文件夹下的文件\n    for cla in flower_class:\n        cla_path = os.path.join(root, cla)\n        # 遍历获取supported支持的所有文件路径\n        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)\n                  if os.path.splitext(i)[-1] in supported]\n        # 获取该类别对应的索引\n        image_class = class_indices[cla]\n        # 记录该类别的样本数量\n        every_class_num.append(len(images))\n        # 按比例随机采样验证样本\n        val_path = random.sample(images, k=int(len(images) * val_rate))\n\n        for img_path in images:\n            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集\n                val_images_path.append(img_path)\n                val_images_label.append(image_class)\n            else:  # 否则存入训练集\n                train_images_path.append(img_path)\n                train_images_label.append(image_class)\n\n    print(\"{} images were found in the dataset.\\n{} for training, {} for validation\".format(sum(every_class_num),\n                                                                                            len(train_images_path),\n                                                                                            len(val_images_path)\n                                                                                            ))\n\n    plot_image = False\n    if plot_image:\n        # 绘制每种类别个数柱状图\n        plt.bar(range(len(flower_class)), every_class_num, align='center')\n        # 将横坐标0,1,2,3,4替换为相应的类别名称\n        plt.xticks(range(len(flower_class)), flower_class)\n        # 在柱状图上添加数值标签\n        for i, v in enumerate(every_class_num):\n            plt.text(x=i, y=v + 5, s=str(v), ha='center')\n        # 设置x坐标\n        plt.xlabel('image class')\n        # 设置y坐标\n        plt.ylabel('number of images')\n        # 设置柱状图的标题\n        plt.title('flower class distribution')\n        plt.show()\n\n    return train_images_path, train_images_label, val_images_path, val_images_label\n\n\ndef generate_ds(data_root: str,\n                train_im_height: int = None,\n                train_im_width: int = None,\n                val_im_height: int = None,\n                val_im_width: int = None,\n                batch_size: int = 8,\n                val_rate: float = 0.1,\n                cache_data: bool = False):\n    \"\"\"\n    读取划分数据集，并生成训练集和验证集的迭代器\n    :param data_root: 数据根目录\n    :param train_im_height: 训练输入网络图像的高度\n    :param train_im_width:  训练输入网络图像的宽度\n    :param val_im_height: 验证输入网络图像的高度\n    :param val_im_width:  验证输入网络图像的宽度\n    :param batch_size: 训练使用的batch size\n    :param val_rate:  将数据按给定比例划分到验证集\n    :param cache_data: 是否缓存数据\n    :return:\n    \"\"\"\n    assert train_im_height is not None\n    assert train_im_width is not None\n    if val_im_width is None:\n        val_im_width = train_im_width\n    if val_im_height is None:\n        val_im_height = train_im_height\n\n    train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate)\n    AUTOTUNE = tf.data.experimental.AUTOTUNE\n\n    def process_train_info(img_path, label):\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image, channels=3)\n        image = tf.cast(image, tf.float32)\n        image = tf.image.resize_with_crop_or_pad(image, train_im_height, train_im_width)\n        image = tf.image.random_flip_left_right(image)\n        image = (image / 255. - 0.5) / 0.5\n        return image, label\n\n    def process_val_info(img_path, label):\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image, channels=3)\n        image = tf.cast(image, tf.float32)\n        image = tf.image.resize_with_crop_or_pad(image, val_im_height, val_im_width)\n        image = (image / 255. - 0.5) / 0.5\n        return image, label\n\n    # Configure dataset for performance\n    def configure_for_performance(ds,\n                                  shuffle_size: int,\n                                  shuffle: bool = False,\n                                  cache: bool = False):\n        if cache:\n            ds = ds.cache()  # 读取数据后缓存至内存\n        if shuffle:\n            ds = ds.shuffle(buffer_size=shuffle_size)  # 打乱数据顺序\n        ds = ds.batch(batch_size)                      # 指定batch size\n        ds = ds.prefetch(buffer_size=AUTOTUNE)         # 在训练的同时提前准备下一个step的数据\n        return ds\n\n    train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path),\n                                                   tf.constant(train_img_label)))\n    total_train = len(train_img_path)\n\n    # Use Dataset.map to create a dataset of image, label pairs\n    train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE)\n    train_ds = configure_for_performance(train_ds, total_train, shuffle=True, cache=cache_data)\n\n    val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path),\n                                                 tf.constant(val_img_label)))\n    total_val = len(val_img_path)\n    # Use Dataset.map to create a dataset of image, label pairs\n    val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE)\n    val_ds = configure_for_performance(val_ds, total_val, cache=False)\n\n    return train_ds, val_ds\n"
  },
  {
    "path": "tensorflow_classification/Test1_official_demo/model.py",
    "content": "from tensorflow.keras.layers import Dense, Flatten, Conv2D\nfrom tensorflow.keras import Model\n\n\nclass MyModel(Model):\n    def __init__(self):\n        super(MyModel, self).__init__()\n        self.conv1 = Conv2D(32, 3, activation='relu')\n        self.flatten = Flatten()\n        self.d1 = Dense(128, activation='relu')\n        self.d2 = Dense(10, activation='softmax')\n\n    def call(self, x, **kwargs):\n        x = self.conv1(x)      # input[batch, 28, 28, 1] output[batch, 26, 26, 32]\n        x = self.flatten(x)    # output [batch, 21632]\n        x = self.d1(x)         # output [batch, 128]\n        return self.d2(x)      # output [batch, 10]\n"
  },
  {
    "path": "tensorflow_classification/Test1_official_demo/train.py",
    "content": "from __future__ import absolute_import, division, print_function, unicode_literals\n\nimport tensorflow as tf\nfrom model import MyModel\n\n\ndef main():\n    mnist = tf.keras.datasets.mnist\n\n    # download and load data\n    (x_train, y_train), (x_test, y_test) = mnist.load_data()\n    x_train, x_test = x_train / 255.0, x_test / 255.0\n\n    # Add a channels dimension\n    x_train = x_train[..., tf.newaxis]\n    x_test = x_test[..., tf.newaxis]\n\n    # create data generator\n    train_ds = tf.data.Dataset.from_tensor_slices(\n        (x_train, y_train)).shuffle(10000).batch(32)\n    test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)\n\n    # create model\n    model = MyModel()\n\n    # define loss\n    loss_object = tf.keras.losses.SparseCategoricalCrossentropy()\n    # define optimizer\n    optimizer = tf.keras.optimizers.Adam()\n\n    # define train_loss and train_accuracy\n    train_loss = tf.keras.metrics.Mean(name='train_loss')\n    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')\n\n    # define train_loss and train_accuracy\n    test_loss = tf.keras.metrics.Mean(name='test_loss')\n    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')\n\n    # define train function including calculating loss, applying gradient and calculating accuracy\n    @tf.function\n    def train_step(images, labels):\n        with tf.GradientTape() as tape:\n            predictions = model(images)\n            loss = loss_object(labels, predictions)\n        gradients = tape.gradient(loss, model.trainable_variables)\n        optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n\n        train_loss(loss)\n        train_accuracy(labels, predictions)\n\n    # define test function including calculating loss and calculating accuracy\n    @tf.function\n    def test_step(images, labels):\n        predictions = model(images)\n        t_loss = loss_object(labels, predictions)\n\n        test_loss(t_loss)\n        test_accuracy(labels, predictions)\n\n    EPOCHS = 5\n\n    for epoch in range(EPOCHS):\n        train_loss.reset_states()        # clear history info\n        train_accuracy.reset_states()    # clear history info\n        test_loss.reset_states()         # clear history info\n        test_accuracy.reset_states()     # clear history info\n\n        for images, labels in train_ds:\n            train_step(images, labels)\n\n        for test_images, test_labels in test_ds:\n            test_step(test_images, test_labels)\n\n        template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'\n        print(template.format(epoch + 1,\n                              train_loss.result(),\n                              train_accuracy.result() * 100,\n                              test_loss.result(),\n                              test_accuracy.result() * 100))\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test2_alexnet/class_indices.json",
    "content": "{\n    \"0\": \"daisy\",\n    \"1\": \"dandelion\",\n    \"2\": \"roses\",\n    \"3\": \"sunflowers\",\n    \"4\": \"tulips\"\n}"
  },
  {
    "path": "tensorflow_classification/Test2_alexnet/fine_train_alexnet.py",
    "content": "from tensorflow.keras.preprocessing.image import ImageDataGenerator\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport tensorflow as tf\nimport json\nimport os\nimport glob\nfrom tensorflow.keras import layers, models\n\n\ndef AlexNet_pytorch(im_height=224, im_width=224, num_classes=1000):\n    # tensorflow中的tensor通道排序是NHWC\n    input_image = layers.Input(shape=(im_height, im_width, 3), dtype=\"float32\")  # output(None, 224, 224, 3)\n    x = layers.ZeroPadding2D(((2, 1), (2, 1)))(input_image)                      # output(None, 227, 227, 3)\n    x = layers.Conv2D(64, kernel_size=11, strides=4, activation=\"relu\")(x)       # output(None, 55, 55, 64)\n    x = layers.MaxPool2D(pool_size=3, strides=2)(x)                              # output(None, 27, 27, 64)\n    x = layers.Conv2D(192, kernel_size=5, padding=\"same\", activation=\"relu\")(x)  # output(None, 27, 27, 192)\n    x = layers.MaxPool2D(pool_size=3, strides=2)(x)                              # output(None, 13, 13, 128)\n    x = layers.Conv2D(384, kernel_size=3, padding=\"same\", activation=\"relu\")(x)  # output(None, 13, 13, 384)\n    x = layers.Conv2D(256, kernel_size=3, padding=\"same\", activation=\"relu\")(x)  # output(None, 13, 13, 256)\n    x = layers.Conv2D(256, kernel_size=3, padding=\"same\", activation=\"relu\")(x)  # output(None, 13, 13, 256)\n    x = layers.MaxPool2D(pool_size=3, strides=2)(x)                              # output(None, 6, 6, 256)\n\n    x = layers.Flatten()(x)                         # output(None, 6*6*256)\n    x = layers.Dropout(0.5)(x)\n    x = layers.Dense(4096, activation=\"relu\")(x)    # output(None, 4096)\n    x = layers.Dropout(0.5)(x)\n    x = layers.Dense(4096, activation=\"relu\")(x)    # output(None, 4096)\n    x = layers.Dense(num_classes)(x)                  # output(None, 5)\n    predict = layers.Softmax()(x)\n\n    model = models.Model(inputs=input_image, outputs=predict)\n    return model\n\n\ndef main():\n    data_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\n    image_path = os.path.join(data_root, \"data_set\", \"flower_data\")  # flower data set path\n    train_dir = os.path.join(image_path, \"train\")\n    validation_dir = os.path.join(image_path, \"val\")\n    assert os.path.exists(train_dir), \"cannot find {}\".format(train_dir)\n    assert os.path.exists(validation_dir), \"cannot find {}\".format(validation_dir)\n\n    # create direction for saving weights\n    if not os.path.exists(\"save_weights\"):\n        os.makedirs(\"save_weights\")\n\n    im_height = 224\n    im_width = 224\n    batch_size = 32\n    epochs = 10\n\n    def pre_function(img: np.ndarray):\n        # from PIL import Image as im\n        # import numpy as np\n        # img = im.open('test.jpg')\n        # img = np.array(img).astype(np.float32)\n        img = img / 255.\n        img = img - [0.485, 0.456, 0.406]\n        img = img / [0.229, 0.224, 0.225]\n\n        return img\n\n    # data generator with data augmentation\n    train_image_generator = ImageDataGenerator(horizontal_flip=True,\n                                               preprocessing_function=pre_function)\n    validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function)\n\n    train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,\n                                                               batch_size=batch_size,\n                                                               shuffle=True,\n                                                               target_size=(im_height, im_width),\n                                                               class_mode='categorical')\n    total_train = train_data_gen.n\n\n    # get class dict\n    class_indices = train_data_gen.class_indices\n\n    # transform value and key of dict\n    inverse_dict = dict((val, key) for key, val in class_indices.items())\n    # write dict into json file\n    json_str = json.dumps(inverse_dict, indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,\n                                                                  batch_size=batch_size,\n                                                                  shuffle=False,\n                                                                  target_size=(im_height, im_width),\n                                                                  class_mode='categorical')\n    total_val = val_data_gen.n\n    print(\"using {} images for training, {} images for validation.\".format(total_train,\n                                                                           total_val))\n\n    model = AlexNet_pytorch(im_height=im_height, im_width=im_width, num_classes=5)\n\n    pre_weights_path = './pretrain_weights.ckpt'\n    assert len(glob.glob(pre_weights_path+\"*\")), \"cannot find {}\".format(pre_weights_path)\n    model.load_weights(pre_weights_path)\n    for layer_t in model.layers:\n        if 'conv2d' in layer_t.name:\n            layer_t.trainable = False\n\n    model.summary()\n\n    # using keras high level api for training\n    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),\n                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),\n                  metrics=[\"accuracy\"])\n\n    callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/myAlex.h5',\n                                                    save_best_only=True,\n                                                    save_weights_only=True,\n                                                    monitor='val_loss')]\n\n    # tensorflow2.1 recommend to using fit\n    history = model.fit(x=train_data_gen,\n                        steps_per_epoch=total_train // batch_size,\n                        epochs=epochs,\n                        validation_data=val_data_gen,\n                        validation_steps=total_val // batch_size,\n                        callbacks=callbacks)\n\n    # plot loss and accuracy image\n    history_dict = history.history\n    train_loss = history_dict[\"loss\"]\n    train_accuracy = history_dict[\"accuracy\"]\n    val_loss = history_dict[\"val_loss\"]\n    val_accuracy = history_dict[\"val_accuracy\"]\n\n    # figure 1\n    plt.figure()\n    plt.plot(range(epochs), train_loss, label='train_loss')\n    plt.plot(range(epochs), val_loss, label='val_loss')\n    plt.legend()\n    plt.xlabel('epochs')\n    plt.ylabel('loss')\n\n    # figure 2\n    plt.figure()\n    plt.plot(range(epochs), train_accuracy, label='train_accuracy')\n    plt.plot(range(epochs), val_accuracy, label='val_accuracy')\n    plt.legend()\n    plt.xlabel('epochs')\n    plt.ylabel('accuracy')\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test2_alexnet/model.py",
    "content": "from tensorflow.keras import layers, models, Model, Sequential\n\n\ndef AlexNet_v1(im_height=224, im_width=224, num_classes=1000):\n    # tensorflow中的tensor通道排序是NHWC\n    input_image = layers.Input(shape=(im_height, im_width, 3), dtype=\"float32\")  # output(None, 224, 224, 3)\n    x = layers.ZeroPadding2D(((1, 2), (1, 2)))(input_image)                      # output(None, 227, 227, 3)\n    x = layers.Conv2D(48, kernel_size=11, strides=4, activation=\"relu\")(x)       # output(None, 55, 55, 48)\n    x = layers.MaxPool2D(pool_size=3, strides=2)(x)                              # output(None, 27, 27, 48)\n    x = layers.Conv2D(128, kernel_size=5, padding=\"same\", activation=\"relu\")(x)  # output(None, 27, 27, 128)\n    x = layers.MaxPool2D(pool_size=3, strides=2)(x)                              # output(None, 13, 13, 128)\n    x = layers.Conv2D(192, kernel_size=3, padding=\"same\", activation=\"relu\")(x)  # output(None, 13, 13, 192)\n    x = layers.Conv2D(192, kernel_size=3, padding=\"same\", activation=\"relu\")(x)  # output(None, 13, 13, 192)\n    x = layers.Conv2D(128, kernel_size=3, padding=\"same\", activation=\"relu\")(x)  # output(None, 13, 13, 128)\n    x = layers.MaxPool2D(pool_size=3, strides=2)(x)                              # output(None, 6, 6, 128)\n\n    x = layers.Flatten()(x)                         # output(None, 6*6*128)\n    x = layers.Dropout(0.2)(x)\n    x = layers.Dense(2048, activation=\"relu\")(x)    # output(None, 2048)\n    x = layers.Dropout(0.2)(x)\n    x = layers.Dense(2048, activation=\"relu\")(x)    # output(None, 2048)\n    x = layers.Dense(num_classes)(x)                  # output(None, 5)\n    predict = layers.Softmax()(x)\n\n    model = models.Model(inputs=input_image, outputs=predict)\n    return model\n\n\nclass AlexNet_v2(Model):\n    def __init__(self, num_classes=1000):\n        super(AlexNet_v2, self).__init__()\n        self.features = Sequential([\n            layers.ZeroPadding2D(((1, 2), (1, 2))),                                 # output(None, 227, 227, 3)\n            layers.Conv2D(48, kernel_size=11, strides=4, activation=\"relu\"),        # output(None, 55, 55, 48)\n            layers.MaxPool2D(pool_size=3, strides=2),                               # output(None, 27, 27, 48)\n            layers.Conv2D(128, kernel_size=5, padding=\"same\", activation=\"relu\"),   # output(None, 27, 27, 128)\n            layers.MaxPool2D(pool_size=3, strides=2),                               # output(None, 13, 13, 128)\n            layers.Conv2D(192, kernel_size=3, padding=\"same\", activation=\"relu\"),   # output(None, 13, 13, 192)\n            layers.Conv2D(192, kernel_size=3, padding=\"same\", activation=\"relu\"),   # output(None, 13, 13, 192)\n            layers.Conv2D(128, kernel_size=3, padding=\"same\", activation=\"relu\"),   # output(None, 13, 13, 128)\n            layers.MaxPool2D(pool_size=3, strides=2)])                              # output(None, 6, 6, 128)\n\n        self.flatten = layers.Flatten()\n        self.classifier = Sequential([\n            layers.Dropout(0.2),\n            layers.Dense(1024, activation=\"relu\"),                                  # output(None, 2048)\n            layers.Dropout(0.2),\n            layers.Dense(128, activation=\"relu\"),                                   # output(None, 2048)\n            layers.Dense(num_classes),                                                # output(None, 5)\n            layers.Softmax()\n        ])\n\n    def call(self, inputs, **kwargs):\n        x = self.features(inputs)\n        x = self.flatten(x)\n        x = self.classifier(x)\n        return x\n"
  },
  {
    "path": "tensorflow_classification/Test2_alexnet/predict.py",
    "content": "import os\nimport json\n\nfrom PIL import Image\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom model import AlexNet_v1, AlexNet_v2\n\n\ndef main():\n    im_height = 224\n    im_width = 224\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n\n    # resize image to 224x224\n    img = img.resize((im_width, im_height))\n    plt.imshow(img)\n\n    # scaling pixel value to (0-1)\n    img = np.array(img) / 255.\n\n    # Add the image to a batch where it's the only member.\n    img = (np.expand_dims(img, 0))\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    # create model\n    model = AlexNet_v1(num_classes=5)\n    weighs_path = \"./save_weights/myAlex.h5\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(weighs_path)\n    model.load_weights(weighs_path)\n\n    # prediction\n    result = np.squeeze(model.predict(img))\n    predict_class = np.argmax(result)\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_class)],\n                                                 result[predict_class])\n    plt.title(print_res)\n    for i in range(len(result)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  result[i]))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test2_alexnet/read_pth.py",
    "content": "import torch\nimport numpy as np\nimport tensorflow as tf\n\n\ndef rename_var(pth_path, new_ckpt_path, num_classes):\n    pytorch_dict = torch.load(pth_path)\n\n    with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess:\n        new_var_list = []\n\n        for key, value in pytorch_dict.items():\n            if key in except_list:\n                continue\n\n            new_name = key\n            value = value.detach().numpy()\n\n            if 'features.0' in new_name:\n                new_name = new_name.replace(\"features.0.weight\", \"conv2d/kernel\")\n                new_name = new_name.replace(\"features.0.bias\", \"conv2d/bias\")\n\n            if 'features.3' in new_name:\n                new_name = new_name.replace(\"features.3.weight\", \"conv2d_1/kernel\")\n                new_name = new_name.replace(\"features.3.bias\", \"conv2d_1/bias\")\n\n            if 'features.6' in new_name:\n                new_name = new_name.replace(\"features.6.weight\", \"conv2d_2/kernel\")\n                new_name = new_name.replace(\"features.6.bias\", \"conv2d_2/bias\")\n\n            if 'features.8' in new_name:\n                new_name = new_name.replace(\"features.8.weight\", \"conv2d_3/kernel\")\n                new_name = new_name.replace(\"features.8.bias\", \"conv2d_3/bias\")\n\n            if 'features.10' in new_name:\n                new_name = new_name.replace(\"features.10.weight\", \"conv2d_4/kernel\")\n                new_name = new_name.replace(\"features.10.bias\", \"conv2d_4/bias\")\n\n            if 'classifier.1' in new_name:\n                new_name = new_name.replace(\"classifier.1.weight\", \"dense/kernel\")\n                new_name = new_name.replace(\"classifier.1.bias\", \"dense/bias\")\n\n            if 'classifier.4' in new_name:\n                new_name = new_name.replace(\"classifier.4.weight\", \"dense_1/kernel\")\n                new_name = new_name.replace(\"classifier.4.bias\", \"dense_1/bias\")\n\n            if 'conv2d' in new_name and 'kernel' in new_name:\n                value = np.transpose(value, (2, 3, 1, 0)).astype(np.float32)\n            else:\n                value = np.transpose(value).astype(np.float32)\n\n            re_var = tf.Variable(value, name=new_name)\n            new_var_list.append(re_var)\n\n        re_var = tf.Variable(tf.keras.initializers.he_uniform()([4096, num_classes]), name=\"dense_2/kernel\")\n        new_var_list.append(re_var)\n        re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name=\"dense_2/bias\")\n        new_var_list.append(re_var)\n\n        saver = tf.compat.v1.train.Saver(new_var_list)\n        sess.run(tf.compat.v1.global_variables_initializer())\n        saver.save(sess, save_path=new_ckpt_path, write_meta_graph=False, write_state=False)\n\n\nexcept_list = ['classifier.6.weight', 'classifier.6.bias']\n# https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth\npth_path = './alexnet-owt-4df8aa71.pth'\nnew_ckpt_path = './pretrain_weights.ckpt'\nnum_classes = 5\nrename_var(pth_path, new_ckpt_path, num_classes)"
  },
  {
    "path": "tensorflow_classification/Test2_alexnet/train.py",
    "content": "from tensorflow.keras.preprocessing.image import ImageDataGenerator\nimport matplotlib.pyplot as plt\nfrom model import AlexNet_v1, AlexNet_v2\nimport tensorflow as tf\nimport json\nimport os\n\n\ndef main():\n    data_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\n    image_path = os.path.join(data_root, \"data_set\", \"flower_data\")  # flower data set path\n    train_dir = os.path.join(image_path, \"train\")\n    validation_dir = os.path.join(image_path, \"val\")\n    assert os.path.exists(train_dir), \"cannot find {}\".format(train_dir)\n    assert os.path.exists(validation_dir), \"cannot find {}\".format(validation_dir)\n\n    # create direction for saving weights\n    if not os.path.exists(\"save_weights\"):\n        os.makedirs(\"save_weights\")\n\n    im_height = 224\n    im_width = 224\n    batch_size = 32\n    epochs = 10\n\n    # data generator with data augmentation\n    train_image_generator = ImageDataGenerator(rescale=1. / 255,\n                                               horizontal_flip=True)\n    validation_image_generator = ImageDataGenerator(rescale=1. / 255)\n\n    train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,\n                                                               batch_size=batch_size,\n                                                               shuffle=True,\n                                                               target_size=(im_height, im_width),\n                                                               class_mode='categorical')\n    total_train = train_data_gen.n\n\n    # get class dict\n    class_indices = train_data_gen.class_indices\n\n    # transform value and key of dict\n    inverse_dict = dict((val, key) for key, val in class_indices.items())\n    # write dict into json file\n    json_str = json.dumps(inverse_dict, indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,\n                                                                  batch_size=batch_size,\n                                                                  shuffle=False,\n                                                                  target_size=(im_height, im_width),\n                                                                  class_mode='categorical')\n    total_val = val_data_gen.n\n    print(\"using {} images for training, {} images for validation.\".format(total_train,\n                                                                           total_val))\n\n    # sample_training_images, sample_training_labels = next(train_data_gen)  # label is one-hot coding\n    #\n    # # This function will plot images in the form of a grid with 1 row\n    # # and 5 columns where images are placed in each column.\n    # def plotImages(images_arr):\n    #     fig, axes = plt.subplots(1, 5, figsize=(20, 20))\n    #     axes = axes.flatten()\n    #     for img, ax in zip(images_arr, axes):\n    #         ax.imshow(img)\n    #         ax.axis('off')\n    #     plt.tight_layout()\n    #     plt.show()\n    #\n    #\n    # plotImages(sample_training_images[:5])\n\n    model = AlexNet_v1(im_height=im_height, im_width=im_width, num_classes=5)\n    # model = AlexNet_v2(class_num=5)\n    # model.build((batch_size, 224, 224, 3))  # when using subclass model\n    model.summary()\n\n    # using keras high level api for training\n    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),\n                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),\n                  metrics=[\"accuracy\"])\n\n    callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/myAlex.h5',\n                                                    save_best_only=True,\n                                                    save_weights_only=True,\n                                                    monitor='val_loss')]\n\n    # tensorflow2.1 recommend to using fit\n    history = model.fit(x=train_data_gen,\n                        steps_per_epoch=total_train // batch_size,\n                        epochs=epochs,\n                        validation_data=val_data_gen,\n                        validation_steps=total_val // batch_size,\n                        callbacks=callbacks)\n\n    # plot loss and accuracy image\n    history_dict = history.history\n    train_loss = history_dict[\"loss\"]\n    train_accuracy = history_dict[\"accuracy\"]\n    val_loss = history_dict[\"val_loss\"]\n    val_accuracy = history_dict[\"val_accuracy\"]\n\n    # figure 1\n    plt.figure()\n    plt.plot(range(epochs), train_loss, label='train_loss')\n    plt.plot(range(epochs), val_loss, label='val_loss')\n    plt.legend()\n    plt.xlabel('epochs')\n    plt.ylabel('loss')\n\n    # figure 2\n    plt.figure()\n    plt.plot(range(epochs), train_accuracy, label='train_accuracy')\n    plt.plot(range(epochs), val_accuracy, label='val_accuracy')\n    plt.legend()\n    plt.xlabel('epochs')\n    plt.ylabel('accuracy')\n    plt.show()\n\n    # history = model.fit_generator(generator=train_data_gen,\n    #                               steps_per_epoch=total_train // batch_size,\n    #                               epochs=epochs,\n    #                               validation_data=val_data_gen,\n    #                               validation_steps=total_val // batch_size,\n    #                               callbacks=callbacks)\n\n    # # using keras low level api for training\n    # loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)\n    # optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)\n    #\n    # train_loss = tf.keras.metrics.Mean(name='train_loss')\n    # train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')\n    #\n    # test_loss = tf.keras.metrics.Mean(name='test_loss')\n    # test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy')\n    #\n    #\n    # @tf.function\n    # def train_step(images, labels):\n    #     with tf.GradientTape() as tape:\n    #         predictions = model(images, training=True)\n    #         loss = loss_object(labels, predictions)\n    #     gradients = tape.gradient(loss, model.trainable_variables)\n    #     optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n    #\n    #     train_loss(loss)\n    #     train_accuracy(labels, predictions)\n    #\n    #\n    # @tf.function\n    # def test_step(images, labels):\n    #     predictions = model(images, training=False)\n    #     t_loss = loss_object(labels, predictions)\n    #\n    #     test_loss(t_loss)\n    #     test_accuracy(labels, predictions)\n    #\n    #\n    # best_test_loss = float('inf')\n    # for epoch in range(1, epochs+1):\n    #     train_loss.reset_states()        # clear history info\n    #     train_accuracy.reset_states()    # clear history info\n    #     test_loss.reset_states()         # clear history info\n    #     test_accuracy.reset_states()     # clear history info\n    #     for step in range(total_train // batch_size):\n    #         images, labels = next(train_data_gen)\n    #         train_step(images, labels)\n    #\n    #     for step in range(total_val // batch_size):\n    #         test_images, test_labels = next(val_data_gen)\n    #         test_step(test_images, test_labels)\n    #\n    #     template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'\n    #     print(template.format(epoch,\n    #                           train_loss.result(),\n    #                           train_accuracy.result() * 100,\n    #                           test_loss.result(),\n    #                           test_accuracy.result() * 100))\n    #     if test_loss.result() < best_test_loss:\n    #        model.save_weights(\"./save_weights/myAlex.ckpt\", save_format='tf')\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test2_alexnet/trainGPU.py",
    "content": "import matplotlib.pyplot as plt\nfrom model import AlexNet_v1, AlexNet_v2\nimport tensorflow as tf\nimport json\nimport os\nimport time\nimport glob\nimport random\nos.environ[\"CUDA_DEVICE_ORDER\"] = \"PCI_BUS_ID\"\nos.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n\n\ndef main():\n    gpus = tf.config.experimental.list_physical_devices(\"GPU\")\n    if gpus:\n        try:\n            for gpu in gpus:\n                tf.config.experimental.set_memory_growth(gpu, True)\n        except RuntimeError as e:\n            print(e)\n            exit(-1)\n\n    data_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\n    image_path = os.path.join(data_root, \"data_set\", \"flower_data\")  # flower data set path\n    train_dir = os.path.join(image_path, \"train\")\n    validation_dir = os.path.join(image_path, \"val\")\n    assert os.path.exists(train_dir), \"cannot find {}\".format(train_dir)\n    assert os.path.exists(validation_dir), \"cannot find {}\".format(validation_dir)\n\n    # create direction for saving weights\n    if not os.path.exists(\"save_weights\"):\n        os.makedirs(\"save_weights\")\n\n    im_height = 224\n    im_width = 224\n    batch_size = 32\n    epochs = 10\n\n    # class dict\n    data_class = [cla for cla in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, cla))]\n    class_num = len(data_class)\n    class_dict = dict((value, index) for index, value in enumerate(data_class))\n\n    # reverse value and key of dict\n    inverse_dict = dict((val, key) for key, val in class_dict.items())\n    # write dict into json file\n    json_str = json.dumps(inverse_dict, indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    # load train images list\n    train_image_list = glob.glob(train_dir+\"/*/*.jpg\")\n    random.shuffle(train_image_list)\n    train_num = len(train_image_list)\n    assert train_num > 0, \"cannot find any .jpg file in {}\".format(train_dir)\n    train_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in train_image_list]\n\n    # load validation images list\n    val_image_list = glob.glob(validation_dir+\"/*/*.jpg\")\n    random.shuffle(val_image_list)\n    val_num = len(val_image_list)\n    assert val_num > 0, \"cannot find any .jpg file in {}\".format(validation_dir)\n    val_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in val_image_list]\n\n    print(\"using {} images for training, {} images for validation.\".format(train_num,\n                                                                           val_num))\n\n    def process_path(img_path, label):\n        label = tf.one_hot(label, depth=class_num)\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image)\n        image = tf.image.convert_image_dtype(image, tf.float32)\n        image = tf.image.resize(image, [im_height, im_width])\n        return image, label\n\n    AUTOTUNE = tf.data.experimental.AUTOTUNE\n\n    # load train dataset\n    train_dataset = tf.data.Dataset.from_tensor_slices((train_image_list, train_label_list))\n    train_dataset = train_dataset.shuffle(buffer_size=train_num)\\\n                                 .map(process_path, num_parallel_calls=AUTOTUNE)\\\n                                 .repeat().batch(batch_size).prefetch(AUTOTUNE)\n\n    # load train dataset\n    val_dataset = tf.data.Dataset.from_tensor_slices((val_image_list, val_label_list))\n    val_dataset = val_dataset.map(process_path, num_parallel_calls=tf.data.experimental.AUTOTUNE)\\\n                             .repeat().batch(batch_size)\n\n    # 实例化模型\n    model = AlexNet_v1(im_height=im_height, im_width=im_width, num_classes=5)\n    # model = AlexNet_v2(class_num=5)\n    # model.build((batch_size, 224, 224, 3))  # when using subclass model\n    model.summary()\n\n    # using keras low level api for training\n    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)\n    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)\n\n    train_loss = tf.keras.metrics.Mean(name='train_loss')\n    train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')\n\n    test_loss = tf.keras.metrics.Mean(name='test_loss')\n    test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy')\n\n    @tf.function\n    def train_step(images, labels):\n        with tf.GradientTape() as tape:\n            predictions = model(images, training=True)\n            loss = loss_object(labels, predictions)\n        gradients = tape.gradient(loss, model.trainable_variables)\n        optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n\n        train_loss(loss)\n        train_accuracy(labels, predictions)\n\n    @tf.function\n    def test_step(images, labels):\n        predictions = model(images, training=False)\n        t_loss = loss_object(labels, predictions)\n\n        test_loss(t_loss)\n        test_accuracy(labels, predictions)\n\n    best_test_loss = float('inf')\n    train_step_num = train_num // batch_size\n    val_step_num = val_num // batch_size\n    for epoch in range(1, epochs+1):\n        train_loss.reset_states()        # clear history info\n        train_accuracy.reset_states()    # clear history info\n        test_loss.reset_states()         # clear history info\n        test_accuracy.reset_states()     # clear history info\n\n        t1 = time.perf_counter()\n        for index, (images, labels) in enumerate(train_dataset):\n            train_step(images, labels)\n            if index+1 == train_step_num:\n                break\n        print(time.perf_counter()-t1)\n\n        for index, (images, labels) in enumerate(val_dataset):\n            test_step(images, labels)\n            if index+1 == val_step_num:\n                break\n\n        template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'\n        print(template.format(epoch,\n                              train_loss.result(),\n                              train_accuracy.result() * 100,\n                              test_loss.result(),\n                              test_accuracy.result() * 100))\n        if test_loss.result() < best_test_loss:\n            model.save_weights(\"./save_weights/myAlex.ckpt\".format(epoch), save_format='tf')\n\n    # # using keras high level api for training\n    # model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),\n    #               loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),\n    #               metrics=[\"accuracy\"])\n    #\n    # callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/myAlex_{epoch}.h5',\n    #                                                 save_best_only=True,\n    #                                                 save_weights_only=True,\n    #                                                 monitor='val_loss')]\n    #\n    # # tensorflow2.1 recommend to using fit\n    # history = model.fit(x=train_dataset,\n    #                     steps_per_epoch=train_num // batch_size,\n    #                     epochs=epochs,\n    #                     validation_data=val_dataset,\n    #                     validation_steps=val_num // batch_size,\n    #                     callbacks=callbacks)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test3_vgg/class_indices.json",
    "content": "{\n    \"0\": \"daisy\",\n    \"1\": \"dandelion\",\n    \"2\": \"roses\",\n    \"3\": \"sunflowers\",\n    \"4\": \"tulips\"\n}"
  },
  {
    "path": "tensorflow_classification/Test3_vgg/fine_train_vgg16.py",
    "content": "from tensorflow.keras.preprocessing.image import ImageDataGenerator\nimport matplotlib.pyplot as plt\nfrom model import vgg\nimport tensorflow as tf\nimport json\nimport os\nimport glob\n\n\ndef main():\n    data_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\n    image_path = os.path.join(data_root, \"data_set\", \"flower_data\")  # flower data set path\n    train_dir = os.path.join(image_path, \"train\")\n    validation_dir = os.path.join(image_path, \"val\")\n    assert os.path.exists(train_dir), \"cannot find {}\".format(train_dir)\n    assert os.path.exists(validation_dir), \"cannot find {}\".format(validation_dir)\n\n    # create direction for saving weights\n    if not os.path.exists(\"save_weights\"):\n        os.makedirs(\"save_weights\")\n\n    im_height = 224\n    im_width = 224\n    batch_size = 32\n    epochs = 10\n\n    _R_MEAN = 123.68\n    _G_MEAN = 116.78\n    _B_MEAN = 103.94\n\n    def pre_function(img):\n        # img = im.open('test.jpg')\n        # img = np.array(img).astype(np.float32)\n        img = img - [_R_MEAN, _G_MEAN, _B_MEAN]\n\n        return img\n\n    # data generator with data augmentation\n    train_image_generator = ImageDataGenerator(horizontal_flip=True,\n                                               preprocessing_function=pre_function)\n    validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function)\n\n    train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,\n                                                               batch_size=batch_size,\n                                                               shuffle=True,\n                                                               target_size=(im_height, im_width),\n                                                               class_mode='categorical')\n    total_train = train_data_gen.n\n\n    # get class dict\n    class_indices = train_data_gen.class_indices\n\n    # transform value and key of dict\n    inverse_dict = dict((val, key) for key, val in class_indices.items())\n    # write dict into json file\n    json_str = json.dumps(inverse_dict, indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,\n                                                                  batch_size=batch_size,\n                                                                  shuffle=False,\n                                                                  target_size=(im_height, im_width),\n                                                                  class_mode='categorical')\n    total_val = val_data_gen.n\n    print(\"using {} images for training, {} images for validation.\".format(total_train,\n                                                                           total_val))\n\n    model = vgg(\"vgg16\", 224, 224, 5)\n\n    pre_weights_path = './pretrain_weights.ckpt'\n    assert len(glob.glob(pre_weights_path+\"*\")), \"cannot find {}\".format(pre_weights_path)\n    model.load_weights(pre_weights_path)\n    for layer_t in model.layers:\n        if layer_t.name == 'feature':\n            layer_t.trainable = False\n            break\n\n    model.summary()\n\n    # using keras high level api for training\n    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),\n                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),\n                  metrics=[\"accuracy\"])\n\n    callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/myAlex_{epoch}.h5',\n                                                    save_best_only=True,\n                                                    save_weights_only=True,\n                                                    monitor='val_loss')]\n\n    # tensorflow2.1 recommend to using fit\n    history = model.fit(x=train_data_gen,\n                        steps_per_epoch=total_train // batch_size,\n                        epochs=epochs,\n                        validation_data=val_data_gen,\n                        validation_steps=total_val // batch_size,\n                        callbacks=callbacks)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test3_vgg/model.py",
    "content": "from tensorflow.keras import layers, Model, Sequential\n\nCONV_KERNEL_INITIALIZER = {\n    'class_name': 'VarianceScaling',\n    'config': {\n        'scale': 2.0,\n        'mode': 'fan_out',\n        'distribution': 'truncated_normal'\n    }\n}\n\nDENSE_KERNEL_INITIALIZER = {\n    'class_name': 'VarianceScaling',\n    'config': {\n        'scale': 1. / 3.,\n        'mode': 'fan_out',\n        'distribution': 'uniform'\n    }\n}\n\n\ndef VGG(feature, im_height=224, im_width=224, num_classes=1000):\n    # tensorflow中的tensor通道排序是NHWC\n    input_image = layers.Input(shape=(im_height, im_width, 3), dtype=\"float32\")\n    x = feature(input_image)\n    x = layers.Flatten()(x)\n    x = layers.Dropout(rate=0.5)(x)\n    x = layers.Dense(2048, activation='relu',\n                     kernel_initializer=DENSE_KERNEL_INITIALIZER)(x)\n    x = layers.Dropout(rate=0.5)(x)\n    x = layers.Dense(2048, activation='relu',\n                     kernel_initializer=DENSE_KERNEL_INITIALIZER)(x)\n    x = layers.Dense(num_classes,\n                     kernel_initializer=DENSE_KERNEL_INITIALIZER)(x)\n    output = layers.Softmax()(x)\n    model = Model(inputs=input_image, outputs=output)\n    return model\n\n\ndef make_feature(cfg):\n    feature_layers = []\n    for v in cfg:\n        if v == \"M\":\n            feature_layers.append(layers.MaxPool2D(pool_size=2, strides=2))\n        else:\n            conv2d = layers.Conv2D(v, kernel_size=3, padding=\"SAME\", activation=\"relu\",\n                                   kernel_initializer=CONV_KERNEL_INITIALIZER)\n            feature_layers.append(conv2d)\n    return Sequential(feature_layers, name=\"feature\")\n\n\ncfgs = {\n    'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],\n    'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],\n    'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],\n    'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],\n}\n\n\ndef vgg(model_name=\"vgg16\", im_height=224, im_width=224, num_classes=1000):\n    assert model_name in cfgs.keys(), \"not support model {}\".format(model_name)\n    cfg = cfgs[model_name]\n    model = VGG(make_feature(cfg), im_height=im_height, im_width=im_width, num_classes=num_classes)\n    return model\n"
  },
  {
    "path": "tensorflow_classification/Test3_vgg/predict.py",
    "content": "import os\nimport json\n\nfrom PIL import Image\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom model import vgg\n\n\ndef main():\n    im_height = 224\n    im_width = 224\n    num_classes = 5\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n    # resize image to 224x224\n    img = img.resize((im_width, im_height))\n    plt.imshow(img)\n\n    # scaling pixel value to (0-1)\n    img = np.array(img) / 255.\n\n    # Add the image to a batch where it's the only member.\n    img = (np.expand_dims(img, 0))\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    # create model\n    model = vgg(\"vgg16\", im_height=im_height, im_width=im_width, num_classes=num_classes)\n    weights_path = \"./save_weights/myVGG.h5\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(weights_path)\n    model.load_weights(weights_path)\n\n    # prediction\n    result = np.squeeze(model.predict(img))\n    predict_class = np.argmax(result)\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_class)],\n                                                 result[predict_class])\n    plt.title(print_res)\n    for i in range(len(result)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  result[i]))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test3_vgg/read_ckpt.py",
    "content": "import tensorflow as tf\n\n\ndef rename_var(ckpt_path, new_ckpt_path, num_classes=5):\n    with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess:\n        var_list = tf.train.list_variables(ckpt_path)\n        new_var_list = []\n\n        for var_name, shape in var_list:\n            # print(var_name)\n            if var_name in except_list:\n                continue\n\n            var = tf.train.load_variable(ckpt_path, var_name)\n            new_var_name = var_name.replace('vgg_16', 'feature')\n            new_var_name = new_var_name.replace(\"weights\", \"kernel\")\n            new_var_name = new_var_name.replace(\"biases\", \"bias\")\n\n            new_var_name = new_var_name.replace(\"conv1/conv1_1\", \"conv2d\")\n            new_var_name = new_var_name.replace(\"conv1/conv1_2\", \"conv2d_1\")\n\n            new_var_name = new_var_name.replace(\"conv2/conv2_1\", \"conv2d_2\")\n            new_var_name = new_var_name.replace(\"conv2/conv2_2\", \"conv2d_3\")\n\n            new_var_name = new_var_name.replace(\"conv3/conv3_1\", \"conv2d_4\")\n            new_var_name = new_var_name.replace(\"conv3/conv3_2\", \"conv2d_5\")\n            new_var_name = new_var_name.replace(\"conv3/conv3_3\", \"conv2d_6\")\n\n            new_var_name = new_var_name.replace(\"conv4/conv4_1\", \"conv2d_7\")\n            new_var_name = new_var_name.replace(\"conv4/conv4_2\", \"conv2d_8\")\n            new_var_name = new_var_name.replace(\"conv4/conv4_3\", \"conv2d_9\")\n\n            new_var_name = new_var_name.replace(\"conv5/conv5_1\", \"conv2d_10\")\n            new_var_name = new_var_name.replace(\"conv5/conv5_2\", \"conv2d_11\")\n            new_var_name = new_var_name.replace(\"conv5/conv5_3\", \"conv2d_12\")\n\n            if 'fc' in new_var_name:\n                # new_var_name = new_var_name.replace(\"feature/fc6\", \"dense\")\n                # new_var_name = new_var_name.replace(\"feature/fc7\", \"dense_1\")\n                # new_var_name = new_var_name.replace(\"fc8\", \"dense_2\")\n                continue\n\n        #     print(new_var_name)\n            re_var = tf.Variable(var, name=new_var_name)\n            new_var_list.append(re_var)\n\n        re_var = tf.Variable(tf.keras.initializers.he_uniform()([25088, 2048]), name=\"dense/kernel\")\n        new_var_list.append(re_var)\n        re_var = tf.Variable(tf.keras.initializers.he_uniform()([2048]), name=\"dense/bias\")\n        new_var_list.append(re_var)\n\n        re_var = tf.Variable(tf.keras.initializers.he_uniform()([2048, 2048]), name=\"dense_1/kernel\")\n        new_var_list.append(re_var)\n        re_var = tf.Variable(tf.keras.initializers.he_uniform()([2048]), name=\"dense_1/bias\")\n        new_var_list.append(re_var)\n\n        re_var = tf.Variable(tf.keras.initializers.he_uniform()([2048, num_classes]), name=\"dense_2/kernel\")\n        new_var_list.append(re_var)\n        re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name=\"dense_2/bias\")\n        new_var_list.append(re_var)\n\n        saver = tf.compat.v1.train.Saver(new_var_list)\n        sess.run(tf.compat.v1.global_variables_initializer())\n        saver.save(sess, save_path=new_ckpt_path, write_meta_graph=False, write_state=False)\n\n\nexcept_list = ['global_step', 'vgg_16/mean_rgb', 'vgg_16/fc8/biases', 'vgg_16/fc8/weights']\n# http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz\nckpt_path = './vgg_16.ckpt'\nnew_ckpt_path = './pretrain_weights.ckpt'\nnum_classes = 5\nrename_var(ckpt_path, new_ckpt_path, num_classes)\n"
  },
  {
    "path": "tensorflow_classification/Test3_vgg/train.py",
    "content": "from tensorflow.keras.preprocessing.image import ImageDataGenerator\nimport matplotlib.pyplot as plt\nfrom model import vgg\nimport tensorflow as tf\nimport json\nimport os\n\n\ndef main():\n    data_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\n    image_path = os.path.join(data_root, \"data_set\", \"flower_data\")  # flower data set path\n    train_dir = os.path.join(image_path, \"train\")\n    validation_dir = os.path.join(image_path, \"val\")\n    assert os.path.exists(train_dir), \"cannot find {}\".format(train_dir)\n    assert os.path.exists(validation_dir), \"cannot find {}\".format(validation_dir)\n\n    # create direction for saving weights\n    if not os.path.exists(\"save_weights\"):\n        os.makedirs(\"save_weights\")\n\n    im_height = 224\n    im_width = 224\n    batch_size = 32\n    epochs = 10\n\n    # data generator with data augmentation\n    train_image_generator = ImageDataGenerator(rescale=1. / 255,\n                                               horizontal_flip=True)\n    validation_image_generator = ImageDataGenerator(rescale=1. / 255)\n\n    train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,\n                                                               batch_size=batch_size,\n                                                               shuffle=True,\n                                                               target_size=(im_height, im_width),\n                                                               class_mode='categorical')\n    total_train = train_data_gen.n\n\n    # get class dict\n    class_indices = train_data_gen.class_indices\n\n    # transform value and key of dict\n    inverse_dict = dict((val, key) for key, val in class_indices.items())\n    # write dict into json file\n    json_str = json.dumps(inverse_dict, indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,\n                                                                  batch_size=batch_size,\n                                                                  shuffle=False,\n                                                                  target_size=(im_height, im_width),\n                                                                  class_mode='categorical')\n    total_val = val_data_gen.n\n    print(\"using {} images for training, {} images for validation.\".format(total_train,\n                                                                           total_val))\n\n    model = vgg(\"vgg16\", im_height, im_width, num_classes=5)\n    model.summary()\n\n    # using keras high level api for training\n    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),\n                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),\n                  metrics=[\"accuracy\"])\n\n    callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/myVGG.h5',\n                                                    save_best_only=True,\n                                                    save_weights_only=True,\n                                                    monitor='val_loss')]\n\n    # tensorflow2.1 recommend to using fit\n    history = model.fit(x=train_data_gen,\n                        steps_per_epoch=total_train // batch_size,\n                        epochs=epochs,\n                        validation_data=val_data_gen,\n                        validation_steps=total_val // batch_size,\n                        callbacks=callbacks)\n\n    # plot loss and accuracy image\n    history_dict = history.history\n    train_loss = history_dict[\"loss\"]\n    train_accuracy = history_dict[\"accuracy\"]\n    val_loss = history_dict[\"val_loss\"]\n    val_accuracy = history_dict[\"val_accuracy\"]\n\n    # figure 1\n    plt.figure()\n    plt.plot(range(epochs), train_loss, label='train_loss')\n    plt.plot(range(epochs), val_loss, label='val_loss')\n    plt.legend()\n    plt.xlabel('epochs')\n    plt.ylabel('loss')\n\n    # figure 2\n    plt.figure()\n    plt.plot(range(epochs), train_accuracy, label='train_accuracy')\n    plt.plot(range(epochs), val_accuracy, label='val_accuracy')\n    plt.legend()\n    plt.xlabel('epochs')\n    plt.ylabel('accuracy')\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test3_vgg/trainGPU.py",
    "content": "import matplotlib.pyplot as plt\nfrom model import vgg\nimport tensorflow as tf\nimport json\nimport os\nimport time\nimport glob\nimport random\nos.environ[\"CUDA_DEVICE_ORDER\"] = \"PCI_BUS_ID\"\nos.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n\n\ndef main():\n    gpus = tf.config.experimental.list_physical_devices(\"GPU\")\n    if gpus:\n        try:\n            for gpu in gpus:\n                tf.config.experimental.set_memory_growth(gpu, True)\n        except RuntimeError as e:\n            print(e)\n            exit(-1)\n\n    data_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\n    image_path = os.path.join(data_root, \"data_set\", \"flower_data\")  # flower data set path\n    train_dir = os.path.join(image_path, \"train\")\n    validation_dir = os.path.join(image_path, \"val\")\n    assert os.path.exists(train_dir), \"cannot find {}\".format(train_dir)\n    assert os.path.exists(validation_dir), \"cannot find {}\".format(validation_dir)\n\n    # create direction for saving weights\n    if not os.path.exists(\"save_weights\"):\n        os.makedirs(\"save_weights\")\n\n    im_height = 224\n    im_width = 224\n    batch_size = 32\n    epochs = 10\n\n    # class dict\n    data_class = [cla for cla in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, cla))]\n    class_num = len(data_class)\n    class_dict = dict((value, index) for index, value in enumerate(data_class))\n\n    # reverse value and key of dict\n    inverse_dict = dict((val, key) for key, val in class_dict.items())\n    # write dict into json file\n    json_str = json.dumps(inverse_dict, indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    # load train images list\n    train_image_list = glob.glob(train_dir+\"/*/*.jpg\")\n    random.shuffle(train_image_list)\n    train_num = len(train_image_list)\n    assert train_num > 0, \"cannot find any .jpg file in {}\".format(train_dir)\n    train_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in train_image_list]\n\n    # load validation images list\n    val_image_list = glob.glob(validation_dir+\"/*/*.jpg\")\n    random.shuffle(val_image_list)\n    val_num = len(val_image_list)\n    assert val_num > 0, \"cannot find any .jpg file in {}\".format(validation_dir)\n    val_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in val_image_list]\n\n    print(\"using {} images for training, {} images for validation.\".format(train_num,\n                                                                           val_num))\n\n    def process_path(img_path, label):\n        label = tf.one_hot(label, depth=class_num)\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image)\n        image = tf.image.convert_image_dtype(image, tf.float32)\n        image = tf.image.resize(image, [im_height, im_width])\n        return image, label\n\n    AUTOTUNE = tf.data.experimental.AUTOTUNE\n\n    # load train dataset\n    train_dataset = tf.data.Dataset.from_tensor_slices((train_image_list, train_label_list))\n    train_dataset = train_dataset.shuffle(buffer_size=train_num)\\\n                                 .map(process_path, num_parallel_calls=AUTOTUNE)\\\n                                 .repeat().batch(batch_size).prefetch(AUTOTUNE)\n\n    # load train dataset\n    val_dataset = tf.data.Dataset.from_tensor_slices((val_image_list, val_label_list))\n    val_dataset = val_dataset.map(process_path, num_parallel_calls=tf.data.experimental.AUTOTUNE)\\\n                             .repeat().batch(batch_size)\n\n    # 实例化模型\n    model = vgg(\"vgg16\", 224, 224, 5)\n    model.summary()\n\n    # using keras low level api for training\n    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)\n    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)\n\n    train_loss = tf.keras.metrics.Mean(name='train_loss')\n    train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')\n\n    test_loss = tf.keras.metrics.Mean(name='test_loss')\n    test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy')\n\n    @tf.function\n    def train_step(images, labels):\n        with tf.GradientTape() as tape:\n            predictions = model(images, training=True)\n            loss = loss_object(labels, predictions)\n        gradients = tape.gradient(loss, model.trainable_variables)\n        optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n\n        train_loss(loss)\n        train_accuracy(labels, predictions)\n\n    @tf.function\n    def test_step(images, labels):\n        predictions = model(images, training=False)\n        t_loss = loss_object(labels, predictions)\n\n        test_loss(t_loss)\n        test_accuracy(labels, predictions)\n\n    best_test_loss = float('inf')\n    train_step_num = train_num // batch_size\n    val_step_num = val_num // batch_size\n    for epoch in range(1, epochs+1):\n        train_loss.reset_states()        # clear history info\n        train_accuracy.reset_states()    # clear history info\n        test_loss.reset_states()         # clear history info\n        test_accuracy.reset_states()     # clear history info\n\n        t1 = time.perf_counter()\n        for index, (images, labels) in enumerate(train_dataset):\n            train_step(images, labels)\n            if index+1 == train_step_num:\n                break\n        print(time.perf_counter()-t1)\n\n        for index, (images, labels) in enumerate(val_dataset):\n            test_step(images, labels)\n            if index+1 == val_step_num:\n                break\n\n        template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'\n        print(template.format(epoch,\n                              train_loss.result(),\n                              train_accuracy.result() * 100,\n                              test_loss.result(),\n                              test_accuracy.result() * 100))\n        if test_loss.result() < best_test_loss:\n            model.save_weights(\"./save_weights/myVGG.ckpt\".format(epoch), save_format='tf')\n\n    # # using keras high level api for training\n    # model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),\n    #               loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),\n    #               metrics=[\"accuracy\"])\n    #\n    # callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/myVGG_{epoch}.h5',\n    #                                                 save_best_only=True,\n    #                                                 save_weights_only=True,\n    #                                                 monitor='val_loss')]\n    #\n    # # tensorflow2.1 recommend to using fit\n    # history = model.fit(x=train_dataset,\n    #                     steps_per_epoch=train_num // batch_size,\n    #                     epochs=epochs,\n    #                     validation_data=val_dataset,\n    #                     validation_steps=val_num // batch_size,\n    #                     callbacks=callbacks)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test4_goolenet/class_indices.json",
    "content": "{\n    \"0\": \"daisy\",\n    \"1\": \"dandelion\",\n    \"2\": \"roses\",\n    \"3\": \"sunflowers\",\n    \"4\": \"tulips\"\n}"
  },
  {
    "path": "tensorflow_classification/Test4_goolenet/model.py",
    "content": "from tensorflow.keras import layers, models, Model, Sequential\n\n\ndef GoogLeNet(im_height=224, im_width=224, class_num=1000, aux_logits=False):\n    # tensorflow中的tensor通道排序是NHWC\n    input_image = layers.Input(shape=(im_height, im_width, 3), dtype=\"float32\")\n    # (None, 224, 224, 3)\n    x = layers.Conv2D(64, kernel_size=7, strides=2, padding=\"SAME\", activation=\"relu\", name=\"conv2d_1\")(input_image)\n    # (None, 112, 112, 64)\n    x = layers.MaxPool2D(pool_size=3, strides=2, padding=\"SAME\", name=\"maxpool_1\")(x)\n    # (None, 56, 56, 64)\n    x = layers.Conv2D(64, kernel_size=1, activation=\"relu\", name=\"conv2d_2\")(x)\n    # (None, 56, 56, 64)\n    x = layers.Conv2D(192, kernel_size=3, padding=\"SAME\", activation=\"relu\", name=\"conv2d_3\")(x)\n    # (None, 56, 56, 192)\n    x = layers.MaxPool2D(pool_size=3, strides=2, padding=\"SAME\", name=\"maxpool_2\")(x)\n\n    # (None, 28, 28, 192)\n    x = Inception(64, 96, 128, 16, 32, 32, name=\"inception_3a\")(x)\n    # (None, 28, 28, 256)\n    x = Inception(128, 128, 192, 32, 96, 64, name=\"inception_3b\")(x)\n\n    # (None, 28, 28, 480)\n    x = layers.MaxPool2D(pool_size=3, strides=2, padding=\"SAME\", name=\"maxpool_3\")(x)\n    # (None, 14, 14, 480)\n    x = Inception(192, 96, 208, 16, 48, 64, name=\"inception_4a\")(x)\n    if aux_logits:\n        aux1 = InceptionAux(class_num, name=\"aux_1\")(x)\n\n    # (None, 14, 14, 512)\n    x = Inception(160, 112, 224, 24, 64, 64, name=\"inception_4b\")(x)\n    # (None, 14, 14, 512)\n    x = Inception(128, 128, 256, 24, 64, 64, name=\"inception_4c\")(x)\n    # (None, 14, 14, 512)\n    x = Inception(112, 144, 288, 32, 64, 64, name=\"inception_4d\")(x)\n    if aux_logits:\n        aux2 = InceptionAux(class_num, name=\"aux_2\")(x)\n\n    # (None, 14, 14, 528)\n    x = Inception(256, 160, 320, 32, 128, 128, name=\"inception_4e\")(x)\n    # (None, 14, 14, 532)\n    x = layers.MaxPool2D(pool_size=3, strides=2, padding=\"SAME\", name=\"maxpool_4\")(x)\n\n    # (None, 7, 7, 832)\n    x = Inception(256, 160, 320, 32, 128, 128, name=\"inception_5a\")(x)\n    # (None, 7, 7, 832)\n    x = Inception(384, 192, 384, 48, 128, 128, name=\"inception_5b\")(x)\n    # (None, 7, 7, 1024)\n    x = layers.AvgPool2D(pool_size=7, strides=1, name=\"avgpool_1\")(x)\n\n    # (None, 1, 1, 1024)\n    x = layers.Flatten(name=\"output_flatten\")(x)\n    # (None, 1024)\n    x = layers.Dropout(rate=0.4, name=\"output_dropout\")(x)\n    x = layers.Dense(class_num, name=\"output_dense\")(x)\n    # (None, class_num)\n    aux3 = layers.Softmax(name=\"aux_3\")(x)\n\n    if aux_logits:\n        model = models.Model(inputs=input_image, outputs=[aux1, aux2, aux3])\n    else:\n        model = models.Model(inputs=input_image, outputs=aux3)\n    return model\n\n\nclass Inception(layers.Layer):\n    def __init__(self, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj, **kwargs):\n        super(Inception, self).__init__(**kwargs)\n        self.branch1 = layers.Conv2D(ch1x1, kernel_size=1, activation=\"relu\")\n\n        self.branch2 = Sequential([\n            layers.Conv2D(ch3x3red, kernel_size=1, activation=\"relu\"),\n            layers.Conv2D(ch3x3, kernel_size=3, padding=\"SAME\", activation=\"relu\")])      # output_size= input_size\n\n        self.branch3 = Sequential([\n            layers.Conv2D(ch5x5red, kernel_size=1, activation=\"relu\"),\n            layers.Conv2D(ch5x5, kernel_size=5, padding=\"SAME\", activation=\"relu\")])      # output_size= input_size\n\n        self.branch4 = Sequential([\n            layers.MaxPool2D(pool_size=3, strides=1, padding=\"SAME\"),  # caution: default strides==pool_size\n            layers.Conv2D(pool_proj, kernel_size=1, activation=\"relu\")])                  # output_size= input_size\n\n    def call(self, inputs, **kwargs):\n        branch1 = self.branch1(inputs)\n        branch2 = self.branch2(inputs)\n        branch3 = self.branch3(inputs)\n        branch4 = self.branch4(inputs)\n        outputs = layers.concatenate([branch1, branch2, branch3, branch4])\n        return outputs\n\n\nclass InceptionAux(layers.Layer):\n    def __init__(self, num_classes, **kwargs):\n        super(InceptionAux, self).__init__(**kwargs)\n        self.averagePool = layers.AvgPool2D(pool_size=5, strides=3)\n        self.conv = layers.Conv2D(128, kernel_size=1, activation=\"relu\")\n\n        self.fc1 = layers.Dense(1024, activation=\"relu\")\n        self.fc2 = layers.Dense(num_classes)\n        self.softmax = layers.Softmax()\n\n    def call(self, inputs, **kwargs):\n        # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14\n        x = self.averagePool(inputs)\n        # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4\n        x = self.conv(x)\n        # N x 128 x 4 x 4\n        x = layers.Flatten()(x)\n        x = layers.Dropout(rate=0.5)(x)\n        # N x 2048\n        x = self.fc1(x)\n        x = layers.Dropout(rate=0.5)(x)\n        # N x 1024\n        x = self.fc2(x)\n        # N x num_classes\n        x = self.softmax(x)\n\n        return x\n\n\n"
  },
  {
    "path": "tensorflow_classification/Test4_goolenet/model_add_bn.py",
    "content": "from tensorflow.keras import layers, models, Model, Sequential\n\n\ndef InceptionV1(im_height=224, im_width=224, class_num=1000, aux_logits=False):\n    # tensorflow中的tensor通道排序是NHWC\n    input_image = layers.Input(shape=(im_height, im_width, 3), dtype=\"float32\")\n    # (None, 224, 224, 3)\n    x = layers.Conv2D(64, kernel_size=7, strides=2, padding=\"SAME\", use_bias=False, name=\"conv1/conv\")(input_image)\n    x = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name=\"conv1/bn\")(x)\n    x = layers.ReLU()(x)\n    # (None, 112, 112, 64)\n    x = layers.MaxPool2D(pool_size=3, strides=2, padding=\"SAME\", name=\"maxpool_1\")(x)\n    # (None, 56, 56, 64)\n    x = layers.Conv2D(64, kernel_size=1, use_bias=False, name=\"conv2/conv\")(x)\n    x = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name=\"conv2/bn\")(x)\n    x = layers.ReLU()(x)\n    # (None, 56, 56, 64)\n    x = layers.Conv2D(192, kernel_size=3, padding=\"SAME\", use_bias=False, name=\"conv3/conv\")(x)\n    x = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name=\"conv3/bn\")(x)\n    x = layers.ReLU()(x)\n    # (None, 56, 56, 192)\n    x = layers.MaxPool2D(pool_size=3, strides=2, padding=\"SAME\", name=\"maxpool_2\")(x)\n\n    # (None, 28, 28, 192)\n    x = Inception(64, 96, 128, 16, 32, 32, name=\"inception3a\")(x)\n    # (None, 28, 28, 256)\n    x = Inception(128, 128, 192, 32, 96, 64, name=\"inception3b\")(x)\n\n    # (None, 28, 28, 480)\n    x = layers.MaxPool2D(pool_size=3, strides=2, padding=\"SAME\", name=\"maxpool_3\")(x)\n    # (None, 14, 14, 480)\n    x = Inception(192, 96, 208, 16, 48, 64, name=\"inception4a\")(x)\n    if aux_logits:\n        aux1 = InceptionAux(class_num, name=\"aux1\")(x)\n\n    # (None, 14, 14, 512)\n    x = Inception(160, 112, 224, 24, 64, 64, name=\"inception4b\")(x)\n    # (None, 14, 14, 512)\n    x = Inception(128, 128, 256, 24, 64, 64, name=\"inception4c\")(x)\n    # (None, 14, 14, 512)\n    x = Inception(112, 144, 288, 32, 64, 64, name=\"inception4d\")(x)\n    if aux_logits:\n        aux2 = InceptionAux(class_num, name=\"aux2\")(x)\n\n    # (None, 14, 14, 528)\n    x = Inception(256, 160, 320, 32, 128, 128, name=\"inception4e\")(x)\n    # (None, 14, 14, 532)\n    x = layers.MaxPool2D(pool_size=2, strides=2, padding=\"SAME\", name=\"maxpool_4\")(x)\n\n    # (None, 7, 7, 832)\n    x = Inception(256, 160, 320, 32, 128, 128, name=\"inception5a\")(x)\n    # (None, 7, 7, 832)\n    x = Inception(384, 192, 384, 48, 128, 128, name=\"inception5b\")(x)\n    # (None, 7, 7, 1024)\n    x = layers.AvgPool2D(pool_size=7, strides=1, name=\"avgpool_1\")(x)\n\n    # (None, 1, 1, 1024)\n    x = layers.Flatten(name=\"output_flatten\")(x)\n    # (None, 1024)\n    x = layers.Dropout(rate=0.4, name=\"output_dropout\")(x)\n    x = layers.Dense(class_num, name=\"fc\")(x)\n    # (None, class_num)\n    aux3 = layers.Softmax()(x)\n\n    if aux_logits:\n        model = models.Model(inputs=input_image, outputs=[aux1, aux2, aux3])\n    else:\n        model = models.Model(inputs=input_image, outputs=aux3)\n    return model\n\n\nclass Inception(layers.Layer):\n    def __init__(self, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj, **kwargs):\n        super(Inception, self).__init__(**kwargs)\n        self.branch1 = Sequential([\n            layers.Conv2D(ch1x1, kernel_size=1, use_bias=False, name=\"conv\"),\n            layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name=\"bn\"),\n            layers.ReLU()], name=\"branch1\")\n\n        self.branch2 = Sequential([\n            layers.Conv2D(ch3x3red, kernel_size=1, use_bias=False, name=\"0/conv\"),\n            layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name=\"0/bn\"),\n            layers.ReLU(),\n            layers.Conv2D(ch3x3, kernel_size=3, padding=\"SAME\", use_bias=False, name=\"1/conv\"),\n            layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name=\"1/bn\"),\n            layers.ReLU()], name=\"branch2\")      # output_size= input_size\n\n        self.branch3 = Sequential([\n            layers.Conv2D(ch5x5red, kernel_size=1, use_bias=False, name=\"0/conv\"),\n            layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name=\"0/bn\"),\n            layers.ReLU(),\n            layers.Conv2D(ch5x5, kernel_size=3, padding=\"SAME\", use_bias=False, name=\"1/conv\"),\n            layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name=\"1/bn\"),\n            layers.ReLU()], name=\"branch3\")      # output_size= input_size\n\n        self.branch4 = Sequential([\n            layers.MaxPool2D(pool_size=3, strides=1, padding=\"SAME\"),  # caution: default strides==pool_size\n            layers.Conv2D(pool_proj, kernel_size=1, use_bias=False, name=\"1/conv\"),\n            layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name=\"1/bn\"),\n            layers.ReLU()], name=\"branch4\")                  # output_size= input_size\n\n    def call(self, inputs, **kwargs):\n        branch1 = self.branch1(inputs)\n        branch2 = self.branch2(inputs)\n        branch3 = self.branch3(inputs)\n        branch4 = self.branch4(inputs)\n        outputs = layers.concatenate([branch1, branch2, branch3, branch4])\n        return outputs\n\n\nclass InceptionAux(layers.Layer):\n    def __init__(self, num_classes, **kwargs):\n        super(InceptionAux, self).__init__(**kwargs)\n        self.averagePool = layers.AvgPool2D(pool_size=5, strides=3)\n        self.conv = layers.Conv2D(128, kernel_size=1, use_bias=False, name=\"conv/conv\")\n        self.bn1 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name=\"conv/bn\")\n        self.rule1 = layers.ReLU()\n\n        self.fc1 = layers.Dense(1024, activation=\"relu\", name=\"fc1\")\n        self.fc2 = layers.Dense(num_classes, name=\"fc2\")\n        self.softmax = layers.Softmax()\n\n    def call(self, inputs, **kwargs):\n        # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14\n        x = self.averagePool(inputs)\n        # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4\n        x = self.conv(x)\n        x = self.bn1(x)\n        x = self.rule1(x)\n        # N x 128 x 4 x 4\n        x = layers.Flatten()(x)\n        x = layers.Dropout(rate=0.5)(x)\n        # N x 2048\n        x = self.fc1(x)\n        x = layers.Dropout(rate=0.5)(x)\n        # N x 1024\n        x = self.fc2(x)\n        # N x num_classes\n        x = self.softmax(x)\n\n        return x\n"
  },
  {
    "path": "tensorflow_classification/Test4_goolenet/predict.py",
    "content": "import os\nimport glob\nimport json\n\nfrom PIL import Image\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom model import GoogLeNet\n\n\ndef main():\n    im_height = 224\n    im_width = 224\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n    # resize image to 224x224\n    img = img.resize((im_width, im_height))\n    plt.imshow(img)\n\n    # scaling pixel value and normalize\n    img = ((np.array(img) / 255.) - 0.5) / 0.5\n\n    # Add the image to a batch where it's the only member.\n    img = (np.expand_dims(img, 0))\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    model = GoogLeNet(class_num=5, aux_logits=False)\n    model.summary()\n    # model.load_weights(\"./save_weights/myGoogLenet.h5\", by_name=True)  # h5 format\n    weights_path = \"./save_weights/myGoogLeNet.ckpt\"\n    assert len(glob.glob(weights_path + \"*\")), \"cannot find {}\".format(weights_path)\n    model.load_weights(weights_path)\n\n    result = np.squeeze(model.predict(img))\n    predict_class = np.argmax(result)\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_class)],\n                                                 result[predict_class])\n    plt.title(print_res)\n    for i in range(len(result)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  result[i]))\n    plt.show()\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test4_goolenet/read_pth.py",
    "content": "import torch\nimport numpy as np\nimport tensorflow as tf\n\n\ndef rename_var(pth_path, new_ckpt_path, num_classes):\n    pytorch_dict = torch.load(pth_path)\n\n    with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess:\n        new_var_list = []\n\n        for key, value in pytorch_dict.items():\n            if key in except_list:\n                continue\n\n            new_name = key\n            value = value.detach().numpy()\n\n            new_name = new_name.replace(\".\", \"/\")\n\n            # 将卷积核的通道顺序由pytorch调整到tensorflow\n            if 'conv/weight' in new_name:\n                new_name = new_name.replace(\"weight\", \"kernel\")\n                value = np.transpose(value, (2, 3, 1, 0)).astype(np.float32)\n            elif 'bn' in new_name:\n                if \"num_batches_tracked\" in new_name:\n                    continue\n\n                new_name = new_name.replace(\"weight\", \"gamma\")\n                new_name = new_name.replace(\"bias\", \"beta\")\n                new_name = new_name.replace(\"running_mean\", \"moving_mean\")\n                new_name = new_name.replace(\"running_var\", \"moving_variance\")\n\n                value = np.transpose(value).astype(np.float32)\n            elif 'fc1' in new_name:\n                new_name = new_name.replace(\"weight\", \"kernel\")\n                value = np.transpose(value).astype(np.float32)\n\n            re_var = tf.Variable(value, name=new_name)\n            new_var_list.append(re_var)\n\n        # aux1\n        re_var = tf.Variable(tf.keras.initializers.he_uniform()([1024, num_classes]), name=\"aux1/fc2/kernel\")\n        new_var_list.append(re_var)\n        re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name=\"aux1/fc2/bias\")\n        new_var_list.append(re_var)\n\n        # aux2\n        re_var = tf.Variable(tf.keras.initializers.he_uniform()([1024, num_classes]), name=\"aux2/fc2/kernel\")\n        new_var_list.append(re_var)\n        re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name=\"aux2/fc2/bias\")\n        new_var_list.append(re_var)\n\n        # fc\n        re_var = tf.Variable(tf.keras.initializers.he_uniform()([1024, num_classes]), name=\"fc/kernel\")\n        new_var_list.append(re_var)\n        re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name=\"fc/bias\")\n        new_var_list.append(re_var)\n\n        saver = tf.compat.v1.train.Saver(new_var_list)\n        sess.run(tf.compat.v1.global_variables_initializer())\n        saver.save(sess, save_path=new_ckpt_path, write_meta_graph=False, write_state=False)\n\n\n# this script only use for model_add_bn.py\nexcept_list = ['aux1.fc2.weight', 'aux1.fc2.bias', 'aux2.fc2.weight', 'aux2.fc2.bias', 'fc.weight', 'fc.bias']\n# https://download.pytorch.org/models/googlenet-1378be20.pth\npth_path = './googlenet-1378be20.pth'\nnew_ckpt_path = './pretrain_weights.ckpt'\nnum_classes = 5\nrename_var(pth_path, new_ckpt_path, num_classes)\n"
  },
  {
    "path": "tensorflow_classification/Test4_goolenet/train.py",
    "content": "import os\nimport sys\nimport json\n\nimport tensorflow as tf\nfrom tqdm import tqdm\nfrom tensorflow.keras.preprocessing.image import ImageDataGenerator\n\nfrom model import GoogLeNet\n\n\ndef main():\n    data_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\n    image_path = os.path.join(data_root, \"data_set\", \"flower_data\")  # flower data set path\n    train_dir = os.path.join(image_path, \"train\")\n    validation_dir = os.path.join(image_path, \"val\")\n    assert os.path.exists(train_dir), \"cannot find {}\".format(train_dir)\n    assert os.path.exists(validation_dir), \"cannot find {}\".format(validation_dir)\n\n    # create direction for saving weights\n    if not os.path.exists(\"save_weights\"):\n        os.makedirs(\"save_weights\")\n\n    im_height = 224\n    im_width = 224\n    batch_size = 32\n    epochs = 30\n\n    def pre_function(img):\n        # img = im.open('test.jpg')\n        # img = np.array(img).astype(np.float32)\n        img = img / 255.\n        img = (img - 0.5) * 2.0\n\n        return img\n\n    # data generator with data augmentation\n    train_image_generator = ImageDataGenerator(preprocessing_function=pre_function,\n                                               horizontal_flip=True)\n    validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function)\n\n    train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,\n                                                               batch_size=batch_size,\n                                                               shuffle=True,\n                                                               target_size=(im_height, im_width),\n                                                               class_mode='categorical')\n    total_train = train_data_gen.n\n\n    # get class dict\n    class_indices = train_data_gen.class_indices\n\n    # transform value and key of dict\n    inverse_dict = dict((val, key) for key, val in class_indices.items())\n    # write dict into json file\n    json_str = json.dumps(inverse_dict, indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,\n                                                                  batch_size=batch_size,\n                                                                  shuffle=False,\n                                                                  target_size=(im_height, im_width),\n                                                                  class_mode='categorical')\n    total_val = val_data_gen.n\n    print(\"using {} images for training, {} images for validation.\".format(total_train,\n                                                                           total_val))\n\n    model = GoogLeNet(im_height=im_height, im_width=im_width, class_num=5, aux_logits=True)\n    # model.build((batch_size, 224, 224, 3))  # when using subclass model\n    model.summary()\n\n    # using keras low level api for training\n    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)\n    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0003)\n\n    train_loss = tf.keras.metrics.Mean(name='train_loss')\n    train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')\n\n    val_loss = tf.keras.metrics.Mean(name='val_loss')\n    val_accuracy = tf.keras.metrics.CategoricalAccuracy(name='val_accuracy')\n\n    @tf.function\n    def train_step(images, labels):\n        with tf.GradientTape() as tape:\n            aux1, aux2, output = model(images, training=True)\n            loss1 = loss_object(labels, aux1)\n            loss2 = loss_object(labels, aux2)\n            loss3 = loss_object(labels, output)\n            loss = loss1 * 0.3 + loss2 * 0.3 + loss3\n        gradients = tape.gradient(loss, model.trainable_variables)\n        optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n\n        train_loss(loss)\n        train_accuracy(labels, output)\n\n    @tf.function\n    def val_step(images, labels):\n        _, _, output = model(images, training=False)\n        loss = loss_object(labels, output)\n\n        val_loss(loss)\n        val_accuracy(labels, output)\n\n    best_val_acc = 0.\n    for epoch in range(epochs):\n        train_loss.reset_states()  # clear history info\n        train_accuracy.reset_states()  # clear history info\n        val_loss.reset_states()  # clear history info\n        val_accuracy.reset_states()  # clear history info\n\n        # train\n        train_bar = tqdm(range(total_train // batch_size), file=sys.stdout)\n        for step in train_bar:\n            images, labels = next(train_data_gen)\n            train_step(images, labels)\n\n            # print train process\n            train_bar.desc = \"train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}\".format(epoch + 1,\n                                                                                 epochs,\n                                                                                 train_loss.result(),\n                                                                                 train_accuracy.result())\n\n        # validate\n        val_bar = tqdm(range(total_val // batch_size), file=sys.stdout)\n        for step in val_bar:\n            val_images, val_labels = next(val_data_gen)\n            val_step(val_images, val_labels)\n\n            # print val process\n            val_bar.desc = \"valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}\".format(epoch + 1,\n                                                                               epochs,\n                                                                               val_loss.result(),\n                                                                               val_accuracy.result())\n\n        # only save best weights\n        if val_accuracy.result() > best_val_acc:\n            best_val_acc = val_accuracy.result()\n            model.save_weights(\"./save_weights/myGoogLeNet.ckpt\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test4_goolenet/trainGPU.py",
    "content": "import matplotlib.pyplot as plt\nfrom model import GoogLeNet\nimport tensorflow as tf\nimport json\nimport os\nimport time\nimport glob\nimport random\nos.environ[\"CUDA_DEVICE_ORDER\"] = \"PCI_BUS_ID\"\nos.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n\n\ndef main():\n    gpus = tf.config.experimental.list_physical_devices(\"GPU\")\n    if gpus:\n        try:\n            for gpu in gpus:\n                tf.config.experimental.set_memory_growth(gpu, True)\n        except RuntimeError as e:\n            print(e)\n            exit(-1)\n\n    data_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\n    image_path = os.path.join(data_root, \"data_set\", \"flower_data\")  # flower data set path\n    train_dir = os.path.join(image_path, \"train\")\n    validation_dir = os.path.join(image_path, \"val\")\n    assert os.path.exists(train_dir), \"cannot find {}\".format(train_dir)\n    assert os.path.exists(validation_dir), \"cannot find {}\".format(validation_dir)\n\n    # create direction for saving weights\n    if not os.path.exists(\"save_weights\"):\n        os.makedirs(\"save_weights\")\n\n    im_height = 224\n    im_width = 224\n    batch_size = 32\n    epochs = 30\n\n    # class dict\n    data_class = [cla for cla in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, cla))]\n    class_num = len(data_class)\n    class_dict = dict((value, index) for index, value in enumerate(data_class))\n\n    # reverse value and key of dict\n    inverse_dict = dict((val, key) for key, val in class_dict.items())\n    # write dict into json file\n    json_str = json.dumps(inverse_dict, indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    # load train images list\n    train_image_list = glob.glob(train_dir+\"/*/*.jpg\")\n    random.shuffle(train_image_list)\n    train_num = len(train_image_list)\n    assert train_num > 0, \"cannot find any .jpg file in {}\".format(train_dir)\n    train_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in train_image_list]\n\n    # load validation images list\n    val_image_list = glob.glob(validation_dir+\"/*/*.jpg\")\n    random.shuffle(val_image_list)\n    val_num = len(val_image_list)\n    assert val_num > 0, \"cannot find any .jpg file in {}\".format(validation_dir)\n    val_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in val_image_list]\n\n    print(\"using {} images for training, {} images for validation.\".format(train_num,\n                                                                           val_num))\n\n    def process_train_img(img_path, label):\n        label = tf.one_hot(label, depth=class_num)\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image)\n        image = tf.image.convert_image_dtype(image, tf.float32)\n        image = tf.image.resize(image, [im_height, im_width])\n        image = tf.image.random_flip_left_right(image)\n        image = (image - 0.5) / 0.5\n        return image, label\n\n    def process_val_img(img_path, label):\n        label = tf.one_hot(label, depth=class_num)\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image)\n        image = tf.image.convert_image_dtype(image, tf.float32)\n        image = tf.image.resize(image, [im_height, im_width])\n        image = (image - 0.5) / 0.5\n        return image, label\n\n    AUTOTUNE = tf.data.experimental.AUTOTUNE\n\n    # load train dataset\n    train_dataset = tf.data.Dataset.from_tensor_slices((train_image_list, train_label_list))\n    train_dataset = train_dataset.shuffle(buffer_size=train_num)\\\n                                 .map(process_train_img, num_parallel_calls=AUTOTUNE)\\\n                                 .repeat().batch(batch_size).prefetch(AUTOTUNE)\n\n    # load train dataset\n    val_dataset = tf.data.Dataset.from_tensor_slices((val_image_list, val_label_list))\n    val_dataset = val_dataset.map(process_val_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)\\\n                             .repeat().batch(batch_size)\n\n    # 实例化模型\n    model = GoogLeNet(im_height=224, im_width=224, class_num=5, aux_logits=True)\n    model.summary()\n\n    # using keras low level api for training\n    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)\n    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0003)\n\n    train_loss = tf.keras.metrics.Mean(name='train_loss')\n    train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')\n\n    test_loss = tf.keras.metrics.Mean(name='test_loss')\n    test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy')\n\n    @tf.function\n    def train_step(images, labels):\n        with tf.GradientTape() as tape:\n            aux1, aux2, output = model(images, training=True)\n            loss1 = loss_object(labels, aux1)\n            loss2 = loss_object(labels, aux2)\n            loss3 = loss_object(labels, output)\n            loss = loss1 * 0.3 + loss2 * 0.3 + loss3\n        gradients = tape.gradient(loss, model.trainable_variables)\n        optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n\n        train_loss(loss)\n        train_accuracy(labels, output)\n\n    @tf.function\n    def test_step(images, labels):\n        _, _, output = model(images, training=False)\n        t_loss = loss_object(labels, output)\n\n        test_loss(t_loss)\n        test_accuracy(labels, output)\n\n    best_test_loss = float('inf')\n    train_step_num = train_num // batch_size\n    val_step_num = val_num // batch_size\n    for epoch in range(1, epochs+1):\n        train_loss.reset_states()        # clear history info\n        train_accuracy.reset_states()    # clear history info\n        test_loss.reset_states()         # clear history info\n        test_accuracy.reset_states()     # clear history info\n\n        t1 = time.perf_counter()\n        for index, (images, labels) in enumerate(train_dataset):\n            train_step(images, labels)\n            if index+1 == train_step_num:\n                break\n        print(time.perf_counter()-t1)\n\n        for index, (images, labels) in enumerate(val_dataset):\n            test_step(images, labels)\n            if index+1 == val_step_num:\n                break\n\n        template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'\n        print(template.format(epoch,\n                              train_loss.result(),\n                              train_accuracy.result() * 100,\n                              test_loss.result(),\n                              test_accuracy.result() * 100))\n        if test_loss.result() < best_test_loss:\n            model.save_weights(\"./save_weights/myGoogLeNet.ckpt\".format(epoch), save_format='tf')\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test4_goolenet/train_add_bn.py",
    "content": "import os\nimport sys\nimport json\nimport glob\n\nimport numpy as np\nfrom tqdm import tqdm\nimport tensorflow as tf\nfrom tensorflow.keras.preprocessing.image import ImageDataGenerator\n\nfrom model_add_bn import InceptionV1\n\n\ndef main():\n    data_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\n    image_path = os.path.join(data_root, \"data_set\", \"flower_data\")  # flower data set path\n    train_dir = os.path.join(image_path, \"train\")\n    validation_dir = os.path.join(image_path, \"val\")\n    assert os.path.exists(train_dir), \"cannot find {}\".format(train_dir)\n    assert os.path.exists(validation_dir), \"cannot find {}\".format(validation_dir)\n\n    # create direction for saving weights\n    if not os.path.exists(\"save_weights\"):\n        os.makedirs(\"save_weights\")\n\n    im_height = 224\n    im_width = 224\n    batch_size = 16\n    epochs = 30\n\n    def pre_function(img: np.ndarray):\n        # img = im.open('test.jpg')\n        # img = np.array(img).astype(np.float32)\n        img = img / 255.\n        img = img - [0.485, 0.456, 0.406]\n        img = img / [0.229, 0.224, 0.225]\n\n        return img\n\n    # data generator with data augmentation\n    train_image_generator = ImageDataGenerator(preprocessing_function=pre_function,\n                                               horizontal_flip=True)\n    validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function)\n\n    train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,\n                                                               batch_size=batch_size,\n                                                               shuffle=True,\n                                                               target_size=(im_height, im_width),\n                                                               class_mode='categorical')\n    total_train = train_data_gen.n\n\n    # get class dict\n    class_indices = train_data_gen.class_indices\n\n    # transform value and key of dict\n    inverse_dict = dict((val, key) for key, val in class_indices.items())\n    # write dict into json file\n    json_str = json.dumps(inverse_dict, indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,\n                                                                  batch_size=batch_size,\n                                                                  shuffle=False,\n                                                                  target_size=(im_height, im_width),\n                                                                  class_mode='categorical')\n    total_val = val_data_gen.n\n    print(\"using {} images for training, {} images for validation.\".format(total_train,\n                                                                           total_val))\n\n    model = InceptionV1(im_height=im_height, im_width=im_width, class_num=5, aux_logits=True)\n    # model.build((batch_size, 224, 224, 3))  # when using subclass model\n\n    pre_weights_path = './pretrain_weights.ckpt'\n    assert len(glob.glob(pre_weights_path+\"*\")), \"cannot find {}\".format(pre_weights_path)\n    model.load_weights(pre_weights_path)\n    model.summary()\n\n    # using keras low level api for training\n    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)\n    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)\n\n    train_loss = tf.keras.metrics.Mean(name='train_loss')\n    train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')\n\n    val_loss = tf.keras.metrics.Mean(name='val_loss')\n    val_accuracy = tf.keras.metrics.CategoricalAccuracy(name='val_accuracy')\n\n    @tf.function\n    def train_step(images, labels):\n        with tf.GradientTape() as tape:\n            aux1, aux2, output = model(images, training=True)\n            loss1 = loss_object(labels, aux1)\n            loss2 = loss_object(labels, aux2)\n            loss3 = loss_object(labels, output)\n            loss = loss1 * 0.3 + loss2 * 0.3 + loss3\n        gradients = tape.gradient(loss, model.trainable_variables)\n        optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n\n        train_loss(loss)\n        train_accuracy(labels, output)\n\n    @tf.function\n    def val_step(images, labels):\n        _, _, output = model(images, training=False)\n        loss = loss_object(labels, output)\n\n        val_loss(loss)\n        val_accuracy(labels, output)\n\n    best_val_acc = 0.\n    for epoch in range(epochs):\n        train_loss.reset_states()  # clear history info\n        train_accuracy.reset_states()  # clear history info\n        val_loss.reset_states()  # clear history info\n        val_accuracy.reset_states()  # clear history info\n\n        # train\n        train_bar = tqdm(range(total_train // batch_size), file=sys.stdout)\n        for step in train_bar:\n            images, labels = next(train_data_gen)\n            train_step(images, labels)\n\n            # print train process\n            train_bar.desc = \"train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}\".format(epoch + 1,\n                                                                                 epochs,\n                                                                                 train_loss.result(),\n                                                                                 train_accuracy.result())\n\n        # validate\n        val_bar = tqdm(range(total_val // batch_size), file=sys.stdout)\n        for step in val_bar:\n            val_images, val_labels = next(val_data_gen)\n            val_step(val_images, val_labels)\n\n            # print val process\n            val_bar.desc = \"valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}\".format(epoch + 1,\n                                                                               epochs,\n                                                                               val_loss.result(),\n                                                                               val_accuracy.result())\n\n        # only save best weights\n        if val_accuracy.result() > best_val_acc:\n            best_val_acc = val_accuracy.result()\n            model.save_weights(\"./save_weights/myInceptionV1.ckpt\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test5_resnet/batch_predict.py",
    "content": "import os\nimport json\nimport glob\n\nimport tensorflow as tf\nimport numpy as np\nfrom PIL import Image\n\nfrom model import resnet50\n\n\ndef main():\n    im_height = 224\n    im_width = 224\n    num_classes = 5\n\n    _R_MEAN = 123.68\n    _G_MEAN = 116.78\n    _B_MEAN = 103.94\n\n    # load images\n    # 指向需要遍历预测的图像文件夹\n    imgs_root = \"/data/imgs\"\n    assert os.path.exists(imgs_root), f\"file: '{imgs_root}' dose not exist.\"\n    # 读取指定文件夹下所有jpg图像路径\n    img_path_list = [os.path.join(imgs_root, i) for i in os.listdir(imgs_root) if i.endswith(\".jpg\")]\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), f\"file: '{json_path}' dose not exist.\"\n\n    json_file = open(json_path, \"r\")\n    class_indict = json.load(json_file)\n\n    # create model\n    feature = resnet50(num_classes=num_classes, include_top=False)\n    feature.trainable = False\n    model = tf.keras.Sequential([feature,\n                                 tf.keras.layers.GlobalAvgPool2D(),\n                                 tf.keras.layers.Dropout(rate=0.5),\n                                 tf.keras.layers.Dense(1024, activation=\"relu\"),\n                                 tf.keras.layers.Dropout(rate=0.5),\n                                 tf.keras.layers.Dense(num_classes),\n                                 tf.keras.layers.Softmax()])\n\n    # load weights\n    weights_path = './save_weights/resNet_50.ckpt'\n    assert len(glob.glob(weights_path+\"*\")), \"cannot find {}\".format(weights_path)\n    model.load_weights(weights_path)\n\n    batch_size = 8  # 每次预测时将多少张图片打包成一个batch\n    for ids in range(0, len(img_path_list) // batch_size):\n        img_list = []\n        for img_path in img_path_list[ids * batch_size: (ids + 1) * batch_size]:\n            assert os.path.exists(img_path), f\"file: '{img_path}' dose not exist.\"\n            img = Image.open(img_path)\n            # resize image to 224x224\n            img = img.resize((im_width, im_height))\n\n            # scaling pixel value to (0-1)\n            img = np.array(img).astype(np.float32)\n            img = img - [_R_MEAN, _G_MEAN, _B_MEAN]\n            img_list.append(img)\n\n        # batch images\n        # 将img_list列表中的所有图像打包成一个batch\n        batch_img = np.stack(img_list, axis=0)\n\n        # prediction\n        result = model.predict(batch_img)\n        predict_classes = np.argmax(result, axis=1)\n\n        for index, class_index in enumerate(predict_classes):\n            print_res = \"image: {}  class: {}   prob: {:.3}\".format(img_path_list[ids * batch_size + index],\n                                                                    class_indict[str(class_index)],\n                                                                    result[index][class_index])\n            print(print_res)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test5_resnet/class_indices.json",
    "content": "{\n    \"0\": \"daisy\",\n    \"1\": \"dandelion\",\n    \"2\": \"roses\",\n    \"3\": \"sunflowers\",\n    \"4\": \"tulips\"\n}"
  },
  {
    "path": "tensorflow_classification/Test5_resnet/model.py",
    "content": "from tensorflow.keras import layers, Model, Sequential\n\n\nclass BasicBlock(layers.Layer):\n    expansion = 1\n\n    def __init__(self, out_channel, strides=1, downsample=None, **kwargs):\n        super(BasicBlock, self).__init__(**kwargs)\n        self.conv1 = layers.Conv2D(out_channel, kernel_size=3, strides=strides,\n                                   padding=\"SAME\", use_bias=False)\n        self.bn1 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5)\n        # -----------------------------------------\n        self.conv2 = layers.Conv2D(out_channel, kernel_size=3, strides=1,\n                                   padding=\"SAME\", use_bias=False)\n        self.bn2 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5)\n        # -----------------------------------------\n        self.downsample = downsample\n        self.relu = layers.ReLU()\n        self.add = layers.Add()\n\n    def call(self, inputs, training=False):\n        identity = inputs\n        if self.downsample is not None:\n            identity = self.downsample(inputs)\n\n        x = self.conv1(inputs)\n        x = self.bn1(x, training=training)\n        x = self.relu(x)\n\n        x = self.conv2(x)\n        x = self.bn2(x, training=training)\n\n        x = self.add([identity, x])\n        x = self.relu(x)\n\n        return x\n\n\nclass Bottleneck(layers.Layer):\n    \"\"\"\n    注意：原论文中，在虚线残差结构的主分支上，第一个1x1卷积层的步距是2，第二个3x3卷积层步距是1。\n    但在pytorch官方实现过程中是第一个1x1卷积层的步距是1，第二个3x3卷积层步距是2，\n    这么做的好处是能够在top1上提升大概0.5%的准确率。\n    可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch\n    \"\"\"\n    expansion = 4\n\n    def __init__(self, out_channel, strides=1, downsample=None, **kwargs):\n        super(Bottleneck, self).__init__(**kwargs)\n        self.conv1 = layers.Conv2D(out_channel, kernel_size=1, use_bias=False, name=\"conv1\")\n        self.bn1 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name=\"conv1/BatchNorm\")\n        # -----------------------------------------\n        self.conv2 = layers.Conv2D(out_channel, kernel_size=3, use_bias=False,\n                                   strides=strides, padding=\"SAME\", name=\"conv2\")\n        self.bn2 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name=\"conv2/BatchNorm\")\n        # -----------------------------------------\n        self.conv3 = layers.Conv2D(out_channel * self.expansion, kernel_size=1, use_bias=False, name=\"conv3\")\n        self.bn3 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name=\"conv3/BatchNorm\")\n        # -----------------------------------------\n        self.relu = layers.ReLU()\n        self.downsample = downsample\n        self.add = layers.Add()\n\n    def call(self, inputs, training=False):\n        identity = inputs\n        if self.downsample is not None:\n            identity = self.downsample(inputs)\n\n        x = self.conv1(inputs)\n        x = self.bn1(x, training=training)\n        x = self.relu(x)\n\n        x = self.conv2(x)\n        x = self.bn2(x, training=training)\n        x = self.relu(x)\n\n        x = self.conv3(x)\n        x = self.bn3(x, training=training)\n\n        x = self.add([x, identity])\n        x = self.relu(x)\n\n        return x\n\n\ndef _make_layer(block, in_channel, channel, block_num, name, strides=1):\n    downsample = None\n    if strides != 1 or in_channel != channel * block.expansion:\n        downsample = Sequential([\n            layers.Conv2D(channel * block.expansion, kernel_size=1, strides=strides,\n                          use_bias=False, name=\"conv1\"),\n            layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name=\"BatchNorm\")\n        ], name=\"shortcut\")\n\n    layers_list = []\n    layers_list.append(block(channel, downsample=downsample, strides=strides, name=\"unit_1\"))\n\n    for index in range(1, block_num):\n        layers_list.append(block(channel, name=\"unit_\" + str(index + 1)))\n\n    return Sequential(layers_list, name=name)\n\n\ndef _resnet(block, blocks_num, im_width=224, im_height=224, num_classes=1000, include_top=True):\n    # tensorflow中的tensor通道排序是NHWC\n    # (None, 224, 224, 3)\n    input_image = layers.Input(shape=(im_height, im_width, 3), dtype=\"float32\")\n    x = layers.Conv2D(filters=64, kernel_size=7, strides=2,\n                      padding=\"SAME\", use_bias=False, name=\"conv1\")(input_image)\n    x = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name=\"conv1/BatchNorm\")(x)\n    x = layers.ReLU()(x)\n    x = layers.MaxPool2D(pool_size=3, strides=2, padding=\"SAME\")(x)\n\n    x = _make_layer(block, x.shape[-1], 64, blocks_num[0], name=\"block1\")(x)\n    x = _make_layer(block, x.shape[-1], 128, blocks_num[1], strides=2, name=\"block2\")(x)\n    x = _make_layer(block, x.shape[-1], 256, blocks_num[2], strides=2, name=\"block3\")(x)\n    x = _make_layer(block, x.shape[-1], 512, blocks_num[3], strides=2, name=\"block4\")(x)\n\n    if include_top:\n        x = layers.GlobalAvgPool2D()(x)  # pool + flatten\n        x = layers.Dense(num_classes, name=\"logits\")(x)\n        predict = layers.Softmax()(x)\n    else:\n        predict = x\n\n    model = Model(inputs=input_image, outputs=predict)\n\n    return model\n\n\ndef resnet34(im_width=224, im_height=224, num_classes=1000, include_top=True):\n    return _resnet(BasicBlock, [3, 4, 6, 3], im_width, im_height, num_classes, include_top)\n\n\ndef resnet50(im_width=224, im_height=224, num_classes=1000, include_top=True):\n    return _resnet(Bottleneck, [3, 4, 6, 3], im_width, im_height, num_classes, include_top)\n\n\ndef resnet101(im_width=224, im_height=224, num_classes=1000, include_top=True):\n    return _resnet(Bottleneck, [3, 4, 23, 3], im_width, im_height, num_classes, include_top)\n\n"
  },
  {
    "path": "tensorflow_classification/Test5_resnet/predict.py",
    "content": "import os\nimport json\nimport glob\n\nimport tensorflow as tf\nimport numpy as np\nfrom PIL import Image\nimport matplotlib.pyplot as plt\n\nfrom model import resnet50\n\n\ndef main():\n    im_height = 224\n    im_width = 224\n    num_classes = 5\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n    # resize image to 224x224\n    img = img.resize((im_width, im_height))\n    plt.imshow(img)\n\n    # scaling pixel value to (0-1)\n    _R_MEAN = 123.68\n    _G_MEAN = 116.78\n    _B_MEAN = 103.94\n    img = np.array(img).astype(np.float32)\n    img = img - [_R_MEAN, _G_MEAN, _B_MEAN]\n\n    # Add the image to a batch where it's the only member.\n    img = (np.expand_dims(img, 0))\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    # create model\n    feature = resnet50(num_classes=num_classes, include_top=False)\n    feature.trainable = False\n    model = tf.keras.Sequential([feature,\n                                 tf.keras.layers.GlobalAvgPool2D(),\n                                 tf.keras.layers.Dropout(rate=0.5),\n                                 tf.keras.layers.Dense(1024, activation=\"relu\"),\n                                 tf.keras.layers.Dropout(rate=0.5),\n                                 tf.keras.layers.Dense(num_classes),\n                                 tf.keras.layers.Softmax()])\n\n    # load weights\n    weights_path = './save_weights/resNet_50.ckpt'\n    assert len(glob.glob(weights_path+\"*\")), \"cannot find {}\".format(weights_path)\n    model.load_weights(weights_path)\n\n    # prediction\n    result = np.squeeze(model.predict(img))\n    predict_class = np.argmax(result)\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_class)],\n                                                 result[predict_class])\n    plt.title(print_res)\n    for i in range(len(result)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  result[i]))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test5_resnet/read_ckpt.py",
    "content": "\"\"\"\n可直接下载我转好的权重\n链接: https://pan.baidu.com/s/1tLe9ahTMIwQAX7do_S59Zg  密码: u199\n\"\"\"\nimport tensorflow as tf\n\n\ndef rename_var(ckpt_path, new_ckpt_path, num_classes, except_list):\n    with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess:\n        var_list = tf.train.list_variables(ckpt_path)\n        new_var_list = []\n\n        for var_name, shape in var_list:\n            print(var_name)\n            if var_name in except_list:\n                continue\n            var = tf.train.load_variable(ckpt_path, var_name)\n            new_var_name = var_name.replace('resnet_v1_50/', \"\")\n            new_var_name = new_var_name.replace(\"bottleneck_v1/\", \"\")\n            new_var_name = new_var_name.replace(\"shortcut/weights\", \"shortcut/conv1/kernel\")\n            new_var_name = new_var_name.replace(\"weights\", \"kernel\")\n            new_var_name = new_var_name.replace(\"biases\", \"bias\")\n            re_var = tf.Variable(var, name=new_var_name)\n            new_var_list.append(re_var)\n\n        re_var = tf.Variable(tf.keras.initializers.he_uniform()([2048, num_classes]), name=\"logits/kernel\")\n        new_var_list.append(re_var)\n        re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name=\"logits/bias\")\n        new_var_list.append(re_var)\n        saver = tf.compat.v1.train.Saver(new_var_list)\n        sess.run(tf.compat.v1.global_variables_initializer())\n        saver.save(sess, save_path=new_ckpt_path, write_meta_graph=False, write_state=False)\n\n\ndef main():\n    except_list = ['global_step', 'resnet_v1_50/mean_rgb', 'resnet_v1_50/logits/biases', 'resnet_v1_50/logits/weights']\n    ckpt_path = './resnet_v1_50.ckpt'\n    new_ckpt_path = './pretrain_weights.ckpt'\n    num_classes = 5\n    rename_var(ckpt_path, new_ckpt_path, num_classes, except_list)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test5_resnet/read_h5.py",
    "content": "import h5py\n\nf = h5py.File('./save_weights/resNet_1.h5', 'r')\nfor root_name, g in f.items():\n    print(root_name)\n    for _, weights_dirs in g.attrs.items():\n        for i in weights_dirs:\n            name = root_name + \"/\" + str(i, encoding=\"utf-8\")\n            data = f[name]\n            print(data.value)\n\n\n\n\n\n\n\n"
  },
  {
    "path": "tensorflow_classification/Test5_resnet/subclassed_model.py",
    "content": "from tensorflow.keras import layers, Model, Sequential\n\n\nclass BasicBlock(layers.Layer):\n    expansion = 1\n\n    def __init__(self, out_channel, strides=1, downsample=None, **kwargs):\n        super(BasicBlock, self).__init__(**kwargs)\n        self.conv1 = layers.Conv2D(out_channel, kernel_size=3, strides=strides,\n                                   padding=\"SAME\", use_bias=False)\n        self.bn1 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5)\n        # -----------------------------------------\n        self.conv2 = layers.Conv2D(out_channel, kernel_size=3, strides=1,\n                                   padding=\"SAME\", use_bias=False)\n        self.bn2 = layers.BatchNormalization(momentum=0.9, epsilon=1e-5)\n        # -----------------------------------------\n        self.downsample = downsample\n        self.relu = layers.ReLU()\n        self.add = layers.Add()\n\n    def call(self, inputs, training=False, **kwargs):\n        identity = inputs\n        if self.downsample is not None:\n            identity = self.downsample(inputs)\n\n        x = self.conv1(inputs)\n        x = self.bn1(x, training=training)\n        x = self.relu(x)\n\n        x = self.conv2(x)\n        x = self.bn2(x, training=training)\n\n        x = self.add([identity, x])\n        x = self.relu(x)\n\n        return x\n\n\nclass Bottleneck(layers.Layer):\n    \"\"\"\n    注意：原论文中，在虚线残差结构的主分支上，第一个1x1卷积层的步距是2，第二个3x3卷积层步距是1。\n    但在pytorch官方实现过程中是第一个1x1卷积层的步距是1，第二个3x3卷积层步距是2，\n    这么做的好处是能够在top1上提升大概0.5%的准确率。\n    可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch\n    \"\"\"\n    expansion = 4\n\n    def __init__(self, out_channel, strides=1, downsample=None, **kwargs):\n        super(Bottleneck, self).__init__(**kwargs)\n        self.conv1 = layers.Conv2D(out_channel, kernel_size=1, use_bias=False, name=\"conv1\")\n        self.bn1 = layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name=\"conv1/BatchNorm\")\n        # -----------------------------------------\n        self.conv2 = layers.Conv2D(out_channel, kernel_size=3, use_bias=False,\n                                   strides=strides, padding=\"SAME\", name=\"conv2\")\n        self.bn2 = layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name=\"conv2/BatchNorm\")\n        # -----------------------------------------\n        self.conv3 = layers.Conv2D(out_channel * self.expansion, kernel_size=1, use_bias=False, name=\"conv3\")\n        self.bn3 = layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name=\"conv3/BatchNorm\")\n        # -----------------------------------------\n        self.relu = layers.ReLU()\n        self.downsample = downsample\n        self.add = layers.Add()\n\n    def call(self, inputs, training=False, **kwargs):\n        identity = inputs\n        if self.downsample is not None:\n            identity = self.downsample(inputs)\n\n        x = self.conv1(inputs)\n        x = self.bn1(x, training=training)\n        x = self.relu(x)\n\n        x = self.conv2(x)\n        x = self.bn2(x, training=training)\n        x = self.relu(x)\n\n        x = self.conv3(x)\n        x = self.bn3(x, training=training)\n\n        x = self.add([x, identity])\n        x = self.relu(x)\n\n        return x\n\n\nclass ResNet(Model):\n    def __init__(self, block, blocks_num, num_classes=1000, include_top=True, **kwargs):\n        super(ResNet, self).__init__(**kwargs)\n        self.include_top = include_top\n        self.conv1 = layers.Conv2D(filters=64, kernel_size=7, strides=2, padding=\"SAME\",\n                                   use_bias=False, name=\"conv1\")\n        self.bn1 = layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name=\"conv1/BatchNorm\")\n        self.relu1 = layers.ReLU(name=\"relu1\")\n        self.maxpool1 = layers.MaxPool2D(pool_size=3, strides=2, padding=\"SAME\", name=\"maxpool1\")\n\n        self.block1 = self._make_layer(block, True, 64, blocks_num[0], name=\"block1\")\n        self.block2 = self._make_layer(block, False, 128, blocks_num[1], strides=2, name=\"block2\")\n        self.block3 = self._make_layer(block, False, 256, blocks_num[2], strides=2, name=\"block3\")\n        self.block4 = self._make_layer(block, False, 512, blocks_num[3], strides=2, name=\"block4\")\n\n        if self.include_top:\n            self.avgpool = layers.GlobalAvgPool2D(name=\"avgpool1\")\n            self.fc = layers.Dense(num_classes, name=\"logits\")\n            self.softmax = layers.Softmax()\n\n    def call(self, inputs, training=False, **kwargs):\n        x = self.conv1(inputs)\n        x = self.bn1(x, training=training)\n        x = self.relu1(x)\n        x = self.maxpool1(x)\n\n        x = self.block1(x, training=training)\n        x = self.block2(x, training=training)\n        x = self.block3(x, training=training)\n        x = self.block4(x, training=training)\n\n        if self.include_top:\n            x = self.avgpool(x)\n            x = self.fc(x)\n            x = self.softmax(x)\n\n        return x\n\n    def _make_layer(self, block, first_block, channel, block_num, name=None, strides=1):\n        downsample = None\n        if strides != 1 or first_block is True:\n            downsample = Sequential([\n                layers.Conv2D(channel * block.expansion, kernel_size=1, strides=strides,\n                              use_bias=False, name=\"conv1\"),\n                layers.BatchNormalization(momentum=0.9, epsilon=1.001e-5, name=\"BatchNorm\")\n            ], name=\"shortcut\")\n\n        layers_list = []\n        layers_list.append(block(channel, downsample=downsample, strides=strides, name=\"unit_1\"))\n\n        for index in range(1, block_num):\n            layers_list.append(block(channel, name=\"unit_\" + str(index + 1)))\n\n        return Sequential(layers_list, name=name)\n\n\ndef resnet34(num_classes=1000, include_top=True):\n    block = BasicBlock\n    block_num = [3, 4, 6, 3]\n    return ResNet(block, block_num, num_classes, include_top)\n\n\ndef resnet101(num_classes=1000, include_top=True):\n    block = Bottleneck\n    blocks_num = [3, 4, 23, 3]\n    return ResNet(block, blocks_num, num_classes, include_top)\n\n\n\n"
  },
  {
    "path": "tensorflow_classification/Test5_resnet/train.py",
    "content": "import os\nimport sys\nimport glob\nimport json\n\nimport tensorflow as tf\nfrom tensorflow.keras.preprocessing.image import ImageDataGenerator\nfrom tqdm import tqdm\n\nfrom model import resnet50\n\n\ndef main():\n    data_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\n    image_path = os.path.join(data_root, \"data_set\", \"flower_data\")  # flower data set path\n    train_dir = os.path.join(image_path, \"train\")\n    validation_dir = os.path.join(image_path, \"val\")\n    assert os.path.exists(train_dir), \"cannot find {}\".format(train_dir)\n    assert os.path.exists(validation_dir), \"cannot find {}\".format(validation_dir)\n\n    im_height = 224\n    im_width = 224\n    batch_size = 16\n    epochs = 20\n    num_classes = 5\n\n    _R_MEAN = 123.68\n    _G_MEAN = 116.78\n    _B_MEAN = 103.94\n\n    def pre_function(img):\n        # img = im.open('test.jpg')\n        # img = np.array(img).astype(np.float32)\n        img = img - [_R_MEAN, _G_MEAN, _B_MEAN]\n\n        return img\n\n    # data generator with data augmentation\n    train_image_generator = ImageDataGenerator(horizontal_flip=True,\n                                               preprocessing_function=pre_function)\n\n    validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function)\n\n    train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,\n                                                               batch_size=batch_size,\n                                                               shuffle=True,\n                                                               target_size=(im_height, im_width),\n                                                               class_mode='categorical')\n    total_train = train_data_gen.n\n\n    # get class dict\n    class_indices = train_data_gen.class_indices\n\n    # transform value and key of dict\n    inverse_dict = dict((val, key) for key, val in class_indices.items())\n    # write dict into json file\n    json_str = json.dumps(inverse_dict, indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,\n                                                                  batch_size=batch_size,\n                                                                  shuffle=False,\n                                                                  target_size=(im_height, im_width),\n                                                                  class_mode='categorical')\n    # img, _ = next(train_data_gen)\n    total_val = val_data_gen.n\n    print(\"using {} images for training, {} images for validation.\".format(total_train,\n                                                                           total_val))\n\n    feature = resnet50(num_classes=5, include_top=False)\n    # feature.build((None, 224, 224, 3))  # when using subclass model\n\n    # 直接下载我转好的权重\n    # download weights 链接: https://pan.baidu.com/s/1tLe9ahTMIwQAX7do_S59Zg  密码: u199\n    pre_weights_path = './pretrain_weights.ckpt'\n    assert len(glob.glob(pre_weights_path+\"*\")), \"cannot find {}\".format(pre_weights_path)\n    feature.load_weights(pre_weights_path)\n    feature.trainable = False\n    feature.summary()\n\n    model = tf.keras.Sequential([feature,\n                                 tf.keras.layers.GlobalAvgPool2D(),\n                                 tf.keras.layers.Dropout(rate=0.5),\n                                 tf.keras.layers.Dense(1024, activation=\"relu\"),\n                                 tf.keras.layers.Dropout(rate=0.5),\n                                 tf.keras.layers.Dense(num_classes),\n                                 tf.keras.layers.Softmax()])\n    # model.build((None, 224, 224, 3))\n    model.summary()\n\n    # using keras low level api for training\n    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)\n    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002)\n\n    train_loss = tf.keras.metrics.Mean(name='train_loss')\n    train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')\n\n    val_loss = tf.keras.metrics.Mean(name='val_loss')\n    val_accuracy = tf.keras.metrics.CategoricalAccuracy(name='val_accuracy')\n\n    @tf.function\n    def train_step(images, labels):\n        with tf.GradientTape() as tape:\n            output = model(images, training=True)\n            loss = loss_object(labels, output)\n        gradients = tape.gradient(loss, model.trainable_variables)\n        optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n\n        train_loss(loss)\n        train_accuracy(labels, output)\n\n    @tf.function\n    def val_step(images, labels):\n        output = model(images, training=False)\n        loss = loss_object(labels, output)\n\n        val_loss(loss)\n        val_accuracy(labels, output)\n\n    best_val_acc = 0.\n    for epoch in range(epochs):\n        train_loss.reset_states()  # clear history info\n        train_accuracy.reset_states()  # clear history info\n        val_loss.reset_states()  # clear history info\n        val_accuracy.reset_states()  # clear history info\n\n        # train\n        train_bar = tqdm(range(total_train // batch_size), file=sys.stdout)\n        for step in train_bar:\n            images, labels = next(train_data_gen)\n            train_step(images, labels)\n\n            # print train process\n            train_bar.desc = \"train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}\".format(epoch + 1,\n                                                                                 epochs,\n                                                                                 train_loss.result(),\n                                                                                 train_accuracy.result())\n\n        # validate\n        val_bar = tqdm(range(total_val // batch_size), file=sys.stdout)\n        for step in val_bar:\n            test_images, test_labels = next(val_data_gen)\n            val_step(test_images, test_labels)\n\n            # print val process\n            val_bar.desc = \"valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}\".format(epoch + 1,\n                                                                               epochs,\n                                                                               val_loss.result(),\n                                                                               val_accuracy.result())\n\n        # only save best weights\n        if val_accuracy.result() > best_val_acc:\n            best_val_acc = val_accuracy.result()\n            model.save_weights(\"./save_weights/resNet_50.ckpt\", save_format=\"tf\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test5_resnet/trainGPU.py",
    "content": "import matplotlib.pyplot as plt\nfrom model import resnet50\nimport tensorflow as tf\nimport json\nimport os\nimport time\nimport glob\nimport random\nos.environ[\"CUDA_DEVICE_ORDER\"] = \"PCI_BUS_ID\"\nos.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n\n\ndef main():\n    gpus = tf.config.experimental.list_physical_devices(\"GPU\")\n    if gpus:\n        try:\n            for gpu in gpus:\n                tf.config.experimental.set_memory_growth(gpu, True)\n        except RuntimeError as e:\n            print(e)\n            exit(-1)\n\n    data_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\n    image_path = os.path.join(data_root, \"data_set\", \"flower_data\")  # flower data set path\n    train_dir = os.path.join(image_path, \"train\")\n    validation_dir = os.path.join(image_path, \"val\")\n    assert os.path.exists(train_dir), \"cannot find {}\".format(train_dir)\n    assert os.path.exists(validation_dir), \"cannot find {}\".format(validation_dir)\n\n    # create direction for saving weights\n    if not os.path.exists(\"save_weights\"):\n        os.makedirs(\"save_weights\")\n\n    im_height = 224\n    im_width = 224\n\n    _R_MEAN = 123.68\n    _G_MEAN = 116.78\n    _B_MEAN = 103.94\n\n    batch_size = 32\n    epochs = 30\n\n    # class dict\n    data_class = [cla for cla in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, cla))]\n    class_num = len(data_class)\n    class_dict = dict((value, index) for index, value in enumerate(data_class))\n\n    # reverse value and key of dict\n    inverse_dict = dict((val, key) for key, val in class_dict.items())\n    # write dict into json file\n    json_str = json.dumps(inverse_dict, indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    # load train images list\n    random.seed(0)\n    train_image_list = glob.glob(train_dir+\"/*/*.jpg\")\n    random.shuffle(train_image_list)\n    train_num = len(train_image_list)\n    assert train_num > 0, \"cannot find any .jpg file in {}\".format(train_dir)\n    train_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in train_image_list]\n\n    # load validation images list\n    val_image_list = glob.glob(validation_dir+\"/*/*.jpg\")\n    random.shuffle(val_image_list)\n    val_num = len(val_image_list)\n    assert val_num > 0, \"cannot find any .jpg file in {}\".format(validation_dir)\n    val_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in val_image_list]\n\n    print(\"using {} images for training, {} images for validation.\".format(train_num,\n                                                                           val_num))\n\n    def process_train_img(img_path, label):\n        label = tf.one_hot(label, depth=class_num)\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image)\n        # image = tf.image.convert_image_dtype(image, tf.float32)\n        image = tf.cast(image, tf.float32)\n        image = tf.image.resize(image, [im_height, im_width])\n        image = tf.image.random_flip_left_right(image)\n        # image = (image - 0.5) / 0.5\n        image = image - [_R_MEAN, _G_MEAN, _B_MEAN]\n        return image, label\n\n    def process_val_img(img_path, label):\n        label = tf.one_hot(label, depth=class_num)\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image)\n        # image = tf.image.convert_image_dtype(image, tf.float32)\n        image = tf.cast(image, tf.float32)\n        image = tf.image.resize(image, [im_height, im_width])\n        # image = (image - 0.5) / 0.5\n        image = image - [_R_MEAN, _G_MEAN, _B_MEAN]\n        return image, label\n\n    AUTOTUNE = tf.data.experimental.AUTOTUNE\n\n    # load train dataset\n    train_dataset = tf.data.Dataset.from_tensor_slices((train_image_list, train_label_list))\n    train_dataset = train_dataset.shuffle(buffer_size=train_num)\\\n                                 .map(process_train_img, num_parallel_calls=AUTOTUNE)\\\n                                 .repeat().batch(batch_size).prefetch(AUTOTUNE)\n\n    # load train dataset\n    val_dataset = tf.data.Dataset.from_tensor_slices((val_image_list, val_label_list))\n    val_dataset = val_dataset.map(process_val_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)\\\n                             .repeat().batch(batch_size)\n\n    # 实例化模型\n    feature = resnet50(num_classes=5, include_top=False)\n    pre_weights_path = './pretrain_weights.ckpt'\n    assert len(glob.glob(pre_weights_path + \"*\")), \"cannot find {}\".format(pre_weights_path)\n    feature.load_weights(pre_weights_path)\n    feature.trainable = False\n\n    model = tf.keras.Sequential([feature,\n                                 tf.keras.layers.GlobalAvgPool2D(),\n                                 tf.keras.layers.Dropout(rate=0.5),\n                                 tf.keras.layers.Dense(1024, activation=\"relu\"),\n                                 tf.keras.layers.Dropout(rate=0.5),\n                                 tf.keras.layers.Dense(5),\n                                 tf.keras.layers.Softmax()])\n\n    model.summary()\n\n    # using keras low level api for training\n    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)\n    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)\n\n    train_loss = tf.keras.metrics.Mean(name='train_loss')\n    train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')\n\n    test_loss = tf.keras.metrics.Mean(name='test_loss')\n    test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy')\n\n    @tf.function\n    def train_step(images, labels):\n        with tf.GradientTape() as tape:\n            output = model(images, training=True)\n            loss = loss_object(labels, output)\n        gradients = tape.gradient(loss, model.trainable_variables)\n        optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n\n        train_loss(loss)\n        train_accuracy(labels, output)\n\n    @tf.function\n    def test_step(images, labels):\n        output = model(images, training=False)\n        t_loss = loss_object(labels, output)\n\n        test_loss(t_loss)\n        test_accuracy(labels, output)\n\n    best_test_loss = float('inf')\n    train_step_num = train_num // batch_size\n    val_step_num = val_num // batch_size\n    for epoch in range(1, epochs+1):\n        train_loss.reset_states()        # clear history info\n        train_accuracy.reset_states()    # clear history info\n        test_loss.reset_states()         # clear history info\n        test_accuracy.reset_states()     # clear history info\n\n        t1 = time.perf_counter()\n        for index, (images, labels) in enumerate(train_dataset):\n            train_step(images, labels)\n            if index+1 == train_step_num:\n                break\n        print(time.perf_counter()-t1)\n\n        for index, (images, labels) in enumerate(val_dataset):\n            test_step(images, labels)\n            if index+1 == val_step_num:\n                break\n\n        template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'\n        print(template.format(epoch,\n                              train_loss.result(),\n                              train_accuracy.result() * 100,\n                              test_loss.result(),\n                              test_accuracy.result() * 100))\n        if test_loss.result() < best_test_loss:\n            model.save_weights(\"./save_weights/myResNet.ckpt\", save_format='tf')\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test6_mobilenet/model_v2.py",
    "content": "from tensorflow.keras import layers, Model, Sequential\n\n\ndef _make_divisible(ch, divisor=8, min_ch=None):\n    \"\"\"\n    This function is taken from the original tf repo.\n    It ensures that all layers have a channel number that is divisible by 8\n    It can be seen here:\n    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py\n    \"\"\"\n    if min_ch is None:\n        min_ch = divisor\n    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)\n    # Make sure that round down does not go down by more than 10%.\n    if new_ch < 0.9 * ch:\n        new_ch += divisor\n    return new_ch\n\n\nclass ConvBNReLU(layers.Layer):\n    def __init__(self, out_channel, kernel_size=3, stride=1, **kwargs):\n        super(ConvBNReLU, self).__init__(**kwargs)\n        self.conv = layers.Conv2D(filters=out_channel, kernel_size=kernel_size,\n                                  strides=stride, padding='SAME', use_bias=False, name='Conv2d')\n        self.bn = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name='BatchNorm')\n        self.activation = layers.ReLU(max_value=6.0)\n\n    def call(self, inputs, training=False):\n        x = self.conv(inputs)\n        x = self.bn(x, training=training)\n        x = self.activation(x)\n        return x\n\n\nclass InvertedResidual(layers.Layer):\n    def __init__(self, in_channel, out_channel, stride, expand_ratio, **kwargs):\n        super(InvertedResidual, self).__init__(**kwargs)\n        self.hidden_channel = in_channel * expand_ratio\n        self.use_shortcut = stride == 1 and in_channel == out_channel\n\n        layer_list = []\n        if expand_ratio != 1:\n            # 1x1 pointwise conv\n            layer_list.append(ConvBNReLU(out_channel=self.hidden_channel, kernel_size=1, name='expand'))\n\n        layer_list.extend([\n            # 3x3 depthwise conv\n            layers.DepthwiseConv2D(kernel_size=3, padding='SAME', strides=stride,\n                                   use_bias=False, name='depthwise'),\n            layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name='depthwise/BatchNorm'),\n            layers.ReLU(max_value=6.0),\n            # 1x1 pointwise conv(linear)\n            layers.Conv2D(filters=out_channel, kernel_size=1, strides=1,\n                          padding='SAME', use_bias=False, name='project'),\n            layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name='project/BatchNorm')\n        ])\n        self.main_branch = Sequential(layer_list, name='expanded_conv')\n\n    def call(self, inputs, training=False, **kwargs):\n        if self.use_shortcut:\n            return inputs + self.main_branch(inputs, training=training)\n        else:\n            return self.main_branch(inputs, training=training)\n\n\ndef MobileNetV2(im_height=224,\n                im_width=224,\n                num_classes=1000,\n                alpha=1.0,\n                round_nearest=8,\n                include_top=True):\n    block = InvertedResidual\n    input_channel = _make_divisible(32 * alpha, round_nearest)\n    last_channel = _make_divisible(1280 * alpha, round_nearest)\n    inverted_residual_setting = [\n        # t, c, n, s\n        [1, 16, 1, 1],\n        [6, 24, 2, 2],\n        [6, 32, 3, 2],\n        [6, 64, 4, 2],\n        [6, 96, 3, 1],\n        [6, 160, 3, 2],\n        [6, 320, 1, 1],\n    ]\n\n    input_image = layers.Input(shape=(im_height, im_width, 3), dtype='float32')\n    # conv1\n    x = ConvBNReLU(input_channel, stride=2, name='Conv')(input_image)\n    # building inverted residual residual blockes\n    for idx, (t, c, n, s) in enumerate(inverted_residual_setting):\n        output_channel = _make_divisible(c * alpha, round_nearest)\n        for i in range(n):\n            stride = s if i == 0 else 1\n            x = block(x.shape[-1],\n                      output_channel,\n                      stride,\n                      expand_ratio=t)(x)\n    # building last several layers\n    x = ConvBNReLU(last_channel, kernel_size=1, name='Conv_1')(x)\n\n    if include_top is True:\n        # building classifier\n        x = layers.GlobalAveragePooling2D()(x)  # pool + flatten\n        x = layers.Dropout(0.2)(x)\n        output = layers.Dense(num_classes, name='Logits')(x)\n    else:\n        output = x\n\n    model = Model(inputs=input_image, outputs=output)\n    return model\n"
  },
  {
    "path": "tensorflow_classification/Test6_mobilenet/model_v3.py",
    "content": "from typing import Union\nfrom functools import partial\nfrom tensorflow.keras import layers, Model\n\n\ndef _make_divisible(ch, divisor=8, min_ch=None):\n    \"\"\"\n    This function is taken from the original tf repo.\n    It ensures that all layers have a channel number that is divisible by 8\n    It can be seen here:\n    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py\n    \"\"\"\n    if min_ch is None:\n        min_ch = divisor\n    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)\n    # Make sure that round down does not go down by more than 10%.\n    if new_ch < 0.9 * ch:\n        new_ch += divisor\n    return new_ch\n\n\ndef correct_pad(input_size: Union[int, tuple], kernel_size: int):\n    \"\"\"Returns a tuple for zero-padding for 2D convolution with downsampling.\n\n    Arguments:\n      input_size: Input tensor size.\n      kernel_size: An integer or tuple/list of 2 integers.\n\n    Returns:\n      A tuple.\n    \"\"\"\n\n    if isinstance(input_size, int):\n        input_size = (input_size, input_size)\n\n    kernel_size = (kernel_size, kernel_size)\n\n    adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2)\n    correct = (kernel_size[0] // 2, kernel_size[1] // 2)\n    return ((correct[0] - adjust[0], correct[0]),\n            (correct[1] - adjust[1], correct[1]))\n\n\nclass HardSigmoid(layers.Layer):\n    def __init__(self, **kwargs):\n        super(HardSigmoid, self).__init__(**kwargs)\n        self.relu6 = layers.ReLU(6.)\n\n    def call(self, inputs, **kwargs):\n        x = self.relu6(inputs + 3) * (1. / 6)\n        return x\n\n\nclass HardSwish(layers.Layer):\n    def __init__(self, **kwargs):\n        super(HardSwish, self).__init__(**kwargs)\n        self.hard_sigmoid = HardSigmoid()\n\n    def call(self, inputs, **kwargs):\n        x = self.hard_sigmoid(inputs) * inputs\n        return x\n\n\ndef _se_block(inputs, filters, prefix, se_ratio=1 / 4.):\n    # [batch, height, width, channel] -> [batch, channel]\n    x = layers.GlobalAveragePooling2D(name=prefix + 'squeeze_excite/AvgPool')(inputs)\n\n    # Target shape. Tuple of integers, does not include the samples dimension (batch size).\n    # [batch, channel] -> [batch, 1, 1, channel]\n    x = layers.Reshape((1, 1, filters))(x)\n\n    # fc1\n    x = layers.Conv2D(filters=_make_divisible(filters * se_ratio),\n                      kernel_size=1,\n                      padding='same',\n                      name=prefix + 'squeeze_excite/Conv')(x)\n    x = layers.ReLU(name=prefix + 'squeeze_excite/Relu')(x)\n\n    # fc2\n    x = layers.Conv2D(filters=filters,\n                      kernel_size=1,\n                      padding='same',\n                      name=prefix + 'squeeze_excite/Conv_1')(x)\n    x = HardSigmoid(name=prefix + 'squeeze_excite/HardSigmoid')(x)\n\n    x = layers.Multiply(name=prefix + 'squeeze_excite/Mul')([inputs, x])\n    return x\n\n\ndef _inverted_res_block(x,\n                        input_c: int,      # input channel\n                        kernel_size: int,  # kennel size\n                        exp_c: int,        # expanded channel\n                        out_c: int,        # out channel\n                        use_se: bool,      # whether using SE\n                        activation: str,   # RE or HS\n                        stride: int,\n                        block_id: int,\n                        alpha: float = 1.0):\n\n    bn = partial(layers.BatchNormalization, epsilon=0.001, momentum=0.99)\n\n    input_c = _make_divisible(input_c * alpha)\n    exp_c = _make_divisible(exp_c * alpha)\n    out_c = _make_divisible(out_c * alpha)\n\n    act = layers.ReLU if activation == \"RE\" else HardSwish\n\n    shortcut = x\n    prefix = 'expanded_conv/'\n    if block_id:\n        # expand channel\n        prefix = 'expanded_conv_{}/'.format(block_id)\n        x = layers.Conv2D(filters=exp_c,\n                          kernel_size=1,\n                          padding='same',\n                          use_bias=False,\n                          name=prefix + 'expand')(x)\n        x = bn(name=prefix + 'expand/BatchNorm')(x)\n        x = act(name=prefix + 'expand/' + act.__name__)(x)\n\n    if stride == 2:\n        input_size = (x.shape[1], x.shape[2])  # height, width\n        x = layers.ZeroPadding2D(padding=correct_pad(input_size, kernel_size),\n                                 name=prefix + 'depthwise/pad')(x)\n\n    x = layers.DepthwiseConv2D(kernel_size=kernel_size,\n                               strides=stride,\n                               padding='same' if stride == 1 else 'valid',\n                               use_bias=False,\n                               name=prefix + 'depthwise')(x)\n    x = bn(name=prefix + 'depthwise/BatchNorm')(x)\n    x = act(name=prefix + 'depthwise/' + act.__name__)(x)\n\n    if use_se:\n        x = _se_block(x, filters=exp_c, prefix=prefix)\n\n    x = layers.Conv2D(filters=out_c,\n                      kernel_size=1,\n                      padding='same',\n                      use_bias=False,\n                      name=prefix + 'project')(x)\n    x = bn(name=prefix + 'project/BatchNorm')(x)\n\n    if stride == 1 and input_c == out_c:\n        x = layers.Add(name=prefix + 'Add')([shortcut, x])\n\n    return x\n\n\ndef mobilenet_v3_large(input_shape=(224, 224, 3),\n                       num_classes=1000,\n                       alpha=1.0,\n                       include_top=True):\n    \"\"\"\n    download weights url:\n    链接: https://pan.baidu.com/s/13uJznKeqHkjUp72G_gxe8Q  密码: 8quu\n    \"\"\"\n    bn = partial(layers.BatchNormalization, epsilon=0.001, momentum=0.99)\n    img_input = layers.Input(shape=input_shape)\n\n    x = layers.Conv2D(filters=16,\n                      kernel_size=3,\n                      strides=(2, 2),\n                      padding='same',\n                      use_bias=False,\n                      name=\"Conv\")(img_input)\n    x = bn(name=\"Conv/BatchNorm\")(x)\n    x = HardSwish(name=\"Conv/HardSwish\")(x)\n\n    inverted_cnf = partial(_inverted_res_block, alpha=alpha)\n    # input, input_c, k_size, expand_c, use_se, activation, stride, block_id\n    x = inverted_cnf(x, 16, 3, 16, 16, False, \"RE\", 1, 0)\n    x = inverted_cnf(x, 16, 3, 64, 24, False, \"RE\", 2, 1)\n    x = inverted_cnf(x, 24, 3, 72, 24, False, \"RE\", 1, 2)\n    x = inverted_cnf(x, 24, 5, 72, 40, True, \"RE\", 2, 3)\n    x = inverted_cnf(x, 40, 5, 120, 40, True, \"RE\", 1, 4)\n    x = inverted_cnf(x, 40, 5, 120, 40, True, \"RE\", 1, 5)\n    x = inverted_cnf(x, 40, 3, 240, 80, False, \"HS\", 2, 6)\n    x = inverted_cnf(x, 80, 3, 200, 80, False, \"HS\", 1, 7)\n    x = inverted_cnf(x, 80, 3, 184, 80, False, \"HS\", 1, 8)\n    x = inverted_cnf(x, 80, 3, 184, 80, False, \"HS\", 1, 9)\n    x = inverted_cnf(x, 80, 3, 480, 112, True, \"HS\", 1, 10)\n    x = inverted_cnf(x, 112, 3, 672, 112, True, \"HS\", 1, 11)\n    x = inverted_cnf(x, 112, 5, 672, 160, True, \"HS\", 2, 12)\n    x = inverted_cnf(x, 160, 5, 960, 160, True, \"HS\", 1, 13)\n    x = inverted_cnf(x, 160, 5, 960, 160, True, \"HS\", 1, 14)\n\n    last_c = _make_divisible(160 * 6 * alpha)\n    last_point_c = _make_divisible(1280 * alpha)\n\n    x = layers.Conv2D(filters=last_c,\n                      kernel_size=1,\n                      padding='same',\n                      use_bias=False,\n                      name=\"Conv_1\")(x)\n    x = bn(name=\"Conv_1/BatchNorm\")(x)\n    x = HardSwish(name=\"Conv_1/HardSwish\")(x)\n\n    if include_top is True:\n        x = layers.GlobalAveragePooling2D()(x)\n        x = layers.Reshape((1, 1, last_c))(x)\n\n        # fc1\n        x = layers.Conv2D(filters=last_point_c,\n                          kernel_size=1,\n                          padding='same',\n                          name=\"Conv_2\")(x)\n        x = HardSwish(name=\"Conv_2/HardSwish\")(x)\n\n        # fc2\n        x = layers.Conv2D(filters=num_classes,\n                          kernel_size=1,\n                          padding='same',\n                          name='Logits/Conv2d_1c_1x1')(x)\n        x = layers.Flatten()(x)\n        x = layers.Softmax(name=\"Predictions\")(x)\n\n    model = Model(img_input, x, name=\"MobilenetV3large\")\n\n    return model\n\n\ndef mobilenet_v3_small(input_shape=(224, 224, 3),\n                       num_classes=1000,\n                       alpha=1.0,\n                       include_top=True):\n    \"\"\"\n    download weights url:\n    链接: https://pan.baidu.com/s/1vrQ_6HdDTHL1UUAN6nSEcw  密码: rrf0\n    \"\"\"\n    bn = partial(layers.BatchNormalization, epsilon=0.001, momentum=0.99)\n    img_input = layers.Input(shape=input_shape)\n\n    x = layers.Conv2D(filters=16,\n                      kernel_size=3,\n                      strides=(2, 2),\n                      padding='same',\n                      use_bias=False,\n                      name=\"Conv\")(img_input)\n    x = bn(name=\"Conv/BatchNorm\")(x)\n    x = HardSwish(name=\"Conv/HardSwish\")(x)\n\n    inverted_cnf = partial(_inverted_res_block, alpha=alpha)\n    # input, input_c, k_size, expand_c, use_se, activation, stride, block_id\n    x = inverted_cnf(x, 16, 3, 16, 16, True, \"RE\", 2, 0)\n    x = inverted_cnf(x, 16, 3, 72, 24, False, \"RE\", 2, 1)\n    x = inverted_cnf(x, 24, 3, 88, 24, False, \"RE\", 1, 2)\n    x = inverted_cnf(x, 24, 5, 96, 40, True, \"HS\", 2, 3)\n    x = inverted_cnf(x, 40, 5, 240, 40, True, \"HS\", 1, 4)\n    x = inverted_cnf(x, 40, 5, 240, 40, True, \"HS\", 1, 5)\n    x = inverted_cnf(x, 40, 5, 120, 48, True, \"HS\", 1, 6)\n    x = inverted_cnf(x, 48, 5, 144, 48, True, \"HS\", 1, 7)\n    x = inverted_cnf(x, 48, 5, 288, 96, True, \"HS\", 2, 8)\n    x = inverted_cnf(x, 96, 5, 576, 96, True, \"HS\", 1, 9)\n    x = inverted_cnf(x, 96, 5, 576, 96, True, \"HS\", 1, 10)\n\n    last_c = _make_divisible(96 * 6 * alpha)\n    last_point_c = _make_divisible(1024 * alpha)\n\n    x = layers.Conv2D(filters=last_c,\n                      kernel_size=1,\n                      padding='same',\n                      use_bias=False,\n                      name=\"Conv_1\")(x)\n    x = bn(name=\"Conv_1/BatchNorm\")(x)\n    x = HardSwish(name=\"Conv_1/HardSwish\")(x)\n\n    if include_top is True:\n        x = layers.GlobalAveragePooling2D()(x)\n        x = layers.Reshape((1, 1, last_c))(x)\n\n        # fc1\n        x = layers.Conv2D(filters=last_point_c,\n                          kernel_size=1,\n                          padding='same',\n                          name=\"Conv_2\")(x)\n        x = HardSwish(name=\"Conv_2/HardSwish\")(x)\n\n        # fc2\n        x = layers.Conv2D(filters=num_classes,\n                          kernel_size=1,\n                          padding='same',\n                          name='Logits/Conv2d_1c_1x1')(x)\n        x = layers.Flatten()(x)\n        x = layers.Softmax(name=\"Predictions\")(x)\n\n    model = Model(img_input, x, name=\"MobilenetV3large\")\n\n    return model\n"
  },
  {
    "path": "tensorflow_classification/Test6_mobilenet/predict.py",
    "content": "import os\nimport json\nimport glob\nimport numpy as np\n\nfrom PIL import Image\nimport matplotlib.pyplot as plt\nimport tensorflow as tf\n\nfrom model_v2 import MobileNetV2\n\n\ndef main():\n    im_height = 224\n    im_width = 224\n    num_classes = 5\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n    # resize image to 224x224\n    img = img.resize((im_width, im_height))\n    plt.imshow(img)\n\n    # scaling pixel value to (-1,1)\n    img = np.array(img).astype(np.float32)\n    img = ((img / 255.) - 0.5) * 2.0\n\n    # Add the image to a batch where it's the only member.\n    img = (np.expand_dims(img, 0))\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    # create model\n    feature = MobileNetV2(include_top=False)\n    model = tf.keras.Sequential([feature,\n                                 tf.keras.layers.GlobalAvgPool2D(),\n                                 tf.keras.layers.Dropout(rate=0.5),\n                                 tf.keras.layers.Dense(num_classes),\n                                 tf.keras.layers.Softmax()])\n    weights_path = './save_weights/resMobileNetV2.ckpt'\n    assert len(glob.glob(weights_path+\"*\")), \"cannot find {}\".format(weights_path)\n    model.load_weights(weights_path)\n\n    result = np.squeeze(model.predict(img))\n    predict_class = np.argmax(result)\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_class)],\n                                                 result[predict_class])\n    plt.title(print_res)\n    for i in range(len(result)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  result[i]))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test6_mobilenet/read_ckpt.py",
    "content": "\"\"\"\n建议直接下载使用我转好的权重\n链接: https://pan.baidu.com/s/1YgFoIKHqooMrTQg_IqI2hA  密码: 2qht\n\"\"\"\nimport tensorflow as tf\n\n\ndef rename_var(ckpt_path, new_ckpt_path, num_classes, except_list):\n    with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess:\n        var_list = tf.train.list_variables(ckpt_path)\n        new_var_list = []\n\n        for var_name, shape in var_list:\n            # print(var_name)\n            if var_name in except_list:\n                continue\n            if \"RMSProp\" in var_name or \"Exponential\" in var_name:\n                continue\n            var = tf.train.load_variable(ckpt_path, var_name)\n            new_var_name = var_name.replace('MobilenetV2/', \"\")\n            new_var_name = new_var_name.replace(\"/expand/weights\", \"/expand/Conv2d/weights\")\n            new_var_name = new_var_name.replace(\"Conv/weights\", \"Conv/Conv2d/kernel\")\n            new_var_name = new_var_name.replace(\"Conv_1/weights\", \"Conv_1/Conv2d/kernel\")\n            new_var_name = new_var_name.replace(\"weights\", \"kernel\")\n            new_var_name = new_var_name.replace(\"biases\", \"bias\")\n\n            first_word = new_var_name.split('/')[0]\n            if \"expanded_conv\" in first_word:\n                last_word = first_word.split('expanded_conv')[-1]\n                if len(last_word) > 0:\n                    new_word = \"inverted_residual\" + last_word + \"/expanded_conv/\"\n                else:\n                    new_word = \"inverted_residual/expanded_conv/\"\n                new_var_name = new_word + new_var_name.split('/', maxsplit=1)[-1]\n            print(new_var_name)\n            re_var = tf.Variable(var, name=new_var_name)\n            new_var_list.append(re_var)\n\n        re_var = tf.Variable(tf.keras.initializers.he_uniform()([1280, num_classes]), name=\"Logits/kernel\")\n        new_var_list.append(re_var)\n        re_var = tf.Variable(tf.keras.initializers.he_uniform()([num_classes]), name=\"Logits/bias\")\n\n        new_var_list.append(re_var)\n        tf.keras.initializers.he_uniform()\n        saver = tf.compat.v1.train.Saver(new_var_list)\n        sess.run(tf.compat.v1.global_variables_initializer())\n        saver.save(sess, save_path=new_ckpt_path, write_meta_graph=False, write_state=False)\n\n\ndef main():\n    except_list = ['global_step', 'MobilenetV2/Logits/Conv2d_1c_1x1/biases', 'MobilenetV2/Logits/Conv2d_1c_1x1/weights']\n    ckpt_path = './pretrain_model/mobilenet_v2_1.0_224.ckpt'\n    new_ckpt_path = './pretrain_weights.ckpt'\n    num_classes = 5\n    rename_var(ckpt_path, new_ckpt_path, num_classes, except_list)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test6_mobilenet/trainGPU_mobilenet_v2.py",
    "content": "from model_v2 import MobileNetV2\nimport tensorflow as tf\nimport json\nimport os\nimport time\nimport glob\nimport random\nos.environ[\"CUDA_DEVICE_ORDER\"] = \"PCI_BUS_ID\"\nos.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n\n\ndef main():\n    gpus = tf.config.experimental.list_physical_devices(\"GPU\")\n    if gpus:\n        try:\n            for gpu in gpus:\n                tf.config.experimental.set_memory_growth(gpu, True)\n        except RuntimeError as e:\n            print(e)\n            exit(-1)\n\n    data_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\n    image_path = os.path.join(data_root, \"data_set\", \"flower_data\")  # flower data set path\n    train_dir = os.path.join(image_path, \"train\")\n    validation_dir = os.path.join(image_path, \"val\")\n    assert os.path.exists(train_dir), \"cannot find {}\".format(train_dir)\n    assert os.path.exists(validation_dir), \"cannot find {}\".format(validation_dir)\n\n    # create direction for saving weights\n    if not os.path.exists(\"save_weights\"):\n        os.makedirs(\"save_weights\")\n\n    im_height = 224\n    im_width = 224\n    batch_size = 32\n    epochs = 30\n\n    # class dict\n    data_class = [cla for cla in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, cla))]\n    class_num = len(data_class)\n    class_dict = dict((value, index) for index, value in enumerate(data_class))\n\n    # reverse value and key of dict\n    inverse_dict = dict((val, key) for key, val in class_dict.items())\n    # write dict into json file\n    json_str = json.dumps(inverse_dict, indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    # load train images list\n    train_image_list = glob.glob(train_dir+\"/*/*.jpg\")\n    random.shuffle(train_image_list)\n    train_num = len(train_image_list)\n    assert train_num > 0, \"cannot find any .jpg file in {}\".format(train_dir)\n    train_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in train_image_list]\n\n    # load validation images list\n    val_image_list = glob.glob(validation_dir+\"/*/*.jpg\")\n    random.shuffle(val_image_list)\n    val_num = len(val_image_list)\n    assert val_num > 0, \"cannot find any .jpg file in {}\".format(validation_dir)\n    val_label_list = [class_dict[path.split(os.path.sep)[-2]] for path in val_image_list]\n\n    print(\"using {} images for training, {} images for validation.\".format(train_num,\n                                                                           val_num))\n\n    def process_train_img(img_path, label):\n        label = tf.one_hot(label, depth=class_num)\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image)\n        image = tf.image.convert_image_dtype(image, tf.float32)\n        image = tf.image.resize(image, [im_height, im_width])\n        image = tf.image.random_flip_left_right(image)\n        # image = (image - 0.5) / 0.5\n        image = (image - 0.5) * 2.0\n        return image, label\n\n    def process_val_img(img_path, label):\n        label = tf.one_hot(label, depth=class_num)\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image)\n        image = tf.image.convert_image_dtype(image, tf.float32)\n        image = tf.image.resize(image, [im_height, im_width])\n        # image = (image - 0.5) / 0.5\n        image = (image - 0.5) * 2.0\n        return image, label\n\n    AUTOTUNE = tf.data.experimental.AUTOTUNE\n\n    # load train dataset\n    train_dataset = tf.data.Dataset.from_tensor_slices((train_image_list, train_label_list))\n    train_dataset = train_dataset.shuffle(buffer_size=train_num)\\\n                                 .map(process_train_img, num_parallel_calls=AUTOTUNE)\\\n                                 .repeat().batch(batch_size).prefetch(AUTOTUNE)\n\n    # load train dataset\n    val_dataset = tf.data.Dataset.from_tensor_slices((val_image_list, val_label_list))\n    val_dataset = val_dataset.map(process_val_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)\\\n                             .repeat().batch(batch_size)\n\n    # 实例化模型\n    model = MobileNetV2(num_classes=5)\n    pre_weights_path = './pretrain_weights.ckpt'\n    assert len(glob.glob(pre_weights_path + \"*\")), \"cannot find {}\".format(pre_weights_path)\n    model.load_weights(pre_weights_path)\n    for layer_t in model.layers[:-1]:\n        layer_t.trainable = False\n\n    model.summary()\n\n    # using keras low level api for training\n    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True)\n    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)\n\n    train_loss = tf.keras.metrics.Mean(name='train_loss')\n    train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')\n\n    test_loss = tf.keras.metrics.Mean(name='test_loss')\n    test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy')\n\n    @tf.function\n    def train_step(images, labels):\n        with tf.GradientTape() as tape:\n            output = model(images, training=True)\n            loss = loss_object(labels, output)\n        gradients = tape.gradient(loss, model.trainable_variables)\n        optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n\n        train_loss(loss)\n        train_accuracy(labels, output)\n\n    @tf.function\n    def test_step(images, labels):\n        output = model(images, training=False)\n        t_loss = loss_object(labels, output)\n\n        test_loss(t_loss)\n        test_accuracy(labels, output)\n\n    best_test_loss = float('inf')\n    train_step_num = train_num // batch_size\n    val_step_num = val_num // batch_size\n    for epoch in range(1, epochs+1):\n        train_loss.reset_states()        # clear history info\n        train_accuracy.reset_states()    # clear history info\n        test_loss.reset_states()         # clear history info\n        test_accuracy.reset_states()     # clear history info\n\n        t1 = time.perf_counter()\n        for index, (images, labels) in enumerate(train_dataset):\n            train_step(images, labels)\n            if index+1 == train_step_num:\n                break\n        print(time.perf_counter()-t1)\n\n        for index, (images, labels) in enumerate(val_dataset):\n            test_step(images, labels)\n            if index+1 == val_step_num:\n                break\n\n        template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'\n        print(template.format(epoch,\n                              train_loss.result(),\n                              train_accuracy.result() * 100,\n                              test_loss.result(),\n                              test_accuracy.result() * 100))\n        if test_loss.result() < best_test_loss:\n            model.save_weights(\"./save_weights/myMobileNet.ckpt\".format(epoch), save_format='tf')\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test6_mobilenet/train_mobilenet_v2.py",
    "content": "import os\nimport sys\nimport glob\nimport json\n\nimport tensorflow as tf\nfrom tensorflow.keras.preprocessing.image import ImageDataGenerator\nfrom tqdm import tqdm\n\nfrom model_v2 import MobileNetV2\n\n\ndef main():\n    data_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\n    image_path = os.path.join(data_root, \"data_set\", \"flower_data\")  # flower data set path\n    train_dir = os.path.join(image_path, \"train\")\n    validation_dir = os.path.join(image_path, \"val\")\n    assert os.path.exists(train_dir), \"cannot find {}\".format(train_dir)\n    assert os.path.exists(validation_dir), \"cannot find {}\".format(validation_dir)\n\n    im_height = 224\n    im_width = 224\n    batch_size = 16\n    epochs = 20\n    num_classes = 5\n\n    def pre_function(img):\n        # img = im.open('test.jpg')\n        # img = np.array(img).astype(np.float32)\n        img = img / 255.\n        img = (img - 0.5) * 2.0\n        return img\n\n    # data generator with data augmentation\n    train_image_generator = ImageDataGenerator(horizontal_flip=True,\n                                               preprocessing_function=pre_function)\n\n    validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function)\n\n    train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,\n                                                               batch_size=batch_size,\n                                                               shuffle=True,\n                                                               target_size=(im_height, im_width),\n                                                               class_mode='categorical')\n    total_train = train_data_gen.n\n\n    # get class dict\n    class_indices = train_data_gen.class_indices\n\n    # transform value and key of dict\n    inverse_dict = dict((val, key) for key, val in class_indices.items())\n    # write dict into json file\n    json_str = json.dumps(inverse_dict, indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,\n                                                                  batch_size=batch_size,\n                                                                  shuffle=False,\n                                                                  target_size=(im_height, im_width),\n                                                                  class_mode='categorical')\n    # img, _ = next(train_data_gen)\n    total_val = val_data_gen.n\n    print(\"using {} images for training, {} images for validation.\".format(total_train,\n                                                                           total_val))\n\n    # create model except fc layer\n    feature = MobileNetV2(include_top=False)\n    # download weights 链接: https://pan.baidu.com/s/1YgFoIKHqooMrTQg_IqI2hA  密码: 2qht\n    pre_weights_path = './pretrain_weights.ckpt'\n    assert len(glob.glob(pre_weights_path+\"*\")), \"cannot find {}\".format(pre_weights_path)\n    feature.load_weights(pre_weights_path)\n    feature.trainable = False\n    feature.summary()\n\n    # add last fc layer\n    model = tf.keras.Sequential([feature,\n                                 tf.keras.layers.GlobalAvgPool2D(),\n                                 tf.keras.layers.Dropout(rate=0.5),\n                                 tf.keras.layers.Dense(num_classes),\n                                 tf.keras.layers.Softmax()])\n    model.summary()\n\n    # using keras low level api for training\n    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)\n    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)\n\n    train_loss = tf.keras.metrics.Mean(name='train_loss')\n    train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')\n\n    val_loss = tf.keras.metrics.Mean(name='val_loss')\n    val_accuracy = tf.keras.metrics.CategoricalAccuracy(name='val_accuracy')\n\n    @tf.function\n    def train_step(images, labels):\n        with tf.GradientTape() as tape:\n            output = model(images, training=True)\n            loss = loss_object(labels, output)\n        gradients = tape.gradient(loss, model.trainable_variables)\n        optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n\n        train_loss(loss)\n        train_accuracy(labels, output)\n\n    @tf.function\n    def val_step(images, labels):\n        output = model(images, training=False)\n        loss = loss_object(labels, output)\n\n        val_loss(loss)\n        val_accuracy(labels, output)\n\n    best_val_acc = 0.\n    for epoch in range(epochs):\n        train_loss.reset_states()  # clear history info\n        train_accuracy.reset_states()  # clear history info\n        val_loss.reset_states()  # clear history info\n        val_accuracy.reset_states()  # clear history info\n\n        # train\n        train_bar = tqdm(range(total_train // batch_size), file=sys.stdout)\n        for step in train_bar:\n            images, labels = next(train_data_gen)\n            train_step(images, labels)\n\n            # print train process\n            train_bar.desc = \"train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}\".format(epoch + 1,\n                                                                                 epochs,\n                                                                                 train_loss.result(),\n                                                                                 train_accuracy.result())\n\n        # validate\n        val_bar = tqdm(range(total_val // batch_size), file=sys.stdout)\n        for step in val_bar:\n            val_images, val_labels = next(val_data_gen)\n            val_step(val_images, val_labels)\n\n            # print val process\n            val_bar.desc = \"valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}\".format(epoch + 1,\n                                                                               epochs,\n                                                                               val_loss.result(),\n                                                                               val_accuracy.result())\n\n        # only save best weights\n        if val_accuracy.result() > best_val_acc:\n            best_val_acc = val_accuracy.result()\n            model.save_weights(\"./save_weights/resMobileNetV2.ckpt\", save_format=\"tf\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test6_mobilenet/train_mobilenet_v3.py",
    "content": "import os\nimport sys\n\nimport tensorflow as tf\nfrom tqdm import tqdm\n\nfrom model_v3 import mobilenet_v3_large\nfrom utils import generate_ds\n\nassert tf.version.VERSION >= \"2.4.0\", \"version of tf must greater/equal than 2.4.0\"\n\n\ndef main():\n    data_root = \"/data/flower_photos\"  # get data root path\n\n    if not os.path.exists(\"./save_weights\"):\n        os.makedirs(\"./save_weights\")\n\n    im_height = 224\n    im_width = 224\n    batch_size = 16\n    epochs = 20\n    num_classes = 5\n    freeze_layer = False\n\n    # data generator with data augmentation\n    train_ds, val_ds = generate_ds(data_root, im_height, im_width, batch_size)\n\n    # create model\n    model = mobilenet_v3_large(input_shape=(im_height, im_width, 3),\n                               num_classes=num_classes,\n                               include_top=True)\n\n    # load weights\n    pre_weights_path = './weights_mobilenet_v3_large_224_1.0_float.h5'\n    assert os.path.exists(pre_weights_path), \"cannot find {}\".format(pre_weights_path)\n    model.load_weights(pre_weights_path, by_name=True, skip_mismatch=True)\n\n    if freeze_layer is True:\n        # freeze layer, only training 2 last layers\n        for layer in model.layers:\n            if layer.name not in [\"Conv_2\", \"Logits/Conv2d_1c_1x1\"]:\n                layer.trainable = False\n            else:\n                print(\"training: \" + layer.name)\n\n    model.summary()\n\n    # using keras low level api for training\n    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)\n    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)\n\n    train_loss = tf.keras.metrics.Mean(name='train_loss')\n    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')\n\n    val_loss = tf.keras.metrics.Mean(name='val_loss')\n    val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')\n\n    @tf.function\n    def train_step(train_images, train_labels):\n        with tf.GradientTape() as tape:\n            output = model(train_images, training=True)\n            loss = loss_object(train_labels, output)\n        gradients = tape.gradient(loss, model.trainable_variables)\n        optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n\n        train_loss(loss)\n        train_accuracy(train_labels, output)\n\n    @tf.function\n    def val_step(val_images, val_labels):\n        output = model(val_images, training=False)\n        loss = loss_object(val_labels, output)\n\n        val_loss(loss)\n        val_accuracy(val_labels, output)\n\n    best_val_acc = 0.\n    for epoch in range(epochs):\n        train_loss.reset_states()  # clear history info\n        train_accuracy.reset_states()  # clear history info\n        val_loss.reset_states()  # clear history info\n        val_accuracy.reset_states()  # clear history info\n\n        # train\n        train_bar = tqdm(train_ds, file=sys.stdout)\n        for images, labels in train_bar:\n            train_step(images, labels)\n\n            # print train process\n            train_bar.desc = \"train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}\".format(epoch + 1,\n                                                                                 epochs,\n                                                                                 train_loss.result(),\n                                                                                 train_accuracy.result())\n\n        # validate\n        val_bar = tqdm(val_ds, file=sys.stdout)\n        for images, labels in val_bar:\n            val_step(images, labels)\n\n            # print val process\n            val_bar.desc = \"valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}\".format(epoch + 1,\n                                                                               epochs,\n                                                                               val_loss.result(),\n                                                                               val_accuracy.result())\n\n        # only save best weights\n        if val_accuracy.result() > best_val_acc:\n            best_val_acc = val_accuracy.result()\n            model.save_weights(\"./save_weights/resMobileNetV3.ckpt\", save_format=\"tf\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test6_mobilenet/trans_v3_weights.py",
    "content": "import re\nimport tensorflow as tf\nfrom model_v3 import mobilenet_v3_large\n\n\ndef change_word(word: str):\n    word = word.replace(\"MobilenetV3/\", \"\")\n\n    if \"weights\" in word:\n        word = word.replace(\"weights\", \"kernel\")\n    elif \"Conv\" in word and \"biases\" in word:\n        word = word.replace(\"biases\", \"bias\")\n\n    return word\n\n\ndef rename_var(ckpt_path, m_info):\n    with tf.Graph().as_default(), tf.compat.v1.Session().as_default() as sess:\n        var_list = tf.train.list_variables(ckpt_path)\n        pattern = \"ExponentialMovingAverage|Momentum|global_step\"\n\n        var_dict = dict((change_word(name), [name, shape])\n                        for name, shape in var_list\n                        if len(re.findall(pattern, name)) == 0)\n\n        for k, v in m_info:\n            assert k in var_dict, \"{} not in var_dict\".format(k)\n            assert v == var_dict[k][1], \"shape {} not equal {}\".format(v, var_dict[k][1])\n\n        weights = []\n        for k, _ in m_info:\n            var = tf.train.load_variable(ckpt_path, var_dict[k][0])\n            weights.append(var)\n\n        return weights\n\n\ndef main():\n    # https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large_224_1.0_float.tgz\n    ckpt_path = './v3-large_224_1.0_float/pristine/model.ckpt-540000'\n    save_path = './pre_mobilev3.h5'\n    m = mobilenet_v3_large(input_shape=(224, 224, 3), num_classes=1001, include_top=True)\n    m_info = [(i.name.replace(\":0\", \"\"), list(i.shape))\n              for i in m.weights]\n    weights = rename_var(ckpt_path, m_info)\n    m.set_weights(weights)\n    m.save_weights(save_path)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test6_mobilenet/utils.py",
    "content": "import os\nimport json\nimport random\n\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\n\n\ndef read_split_data(root: str, val_rate: float = 0.2):\n    random.seed(0)  # 保证随机划分结果一致\n    assert os.path.exists(root), \"dataset root: {} does not exist.\".format(root)\n\n    # 遍历文件夹，一个文件夹对应一个类别\n    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]\n    # 排序，保证顺序一致\n    flower_class.sort()\n    # 生成类别名称以及对应的数字索引\n    class_indices = dict((k, v) for v, k in enumerate(flower_class))\n    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    train_images_path = []  # 存储训练集的所有图片路径\n    train_images_label = []  # 存储训练集图片对应索引信息\n    val_images_path = []  # 存储验证集的所有图片路径\n    val_images_label = []  # 存储验证集图片对应索引信息\n    every_class_num = []  # 存储每个类别的样本总数\n    supported = [\".jpg\", \".JPG\", \".jpeg\", \".JPEG\"]  # 支持的文件后缀类型\n    # 遍历每个文件夹下的文件\n    for cla in flower_class:\n        cla_path = os.path.join(root, cla)\n        # 遍历获取supported支持的所有文件路径\n        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)\n                  if os.path.splitext(i)[-1] in supported]\n        # 获取该类别对应的索引\n        image_class = class_indices[cla]\n        # 记录该类别的样本数量\n        every_class_num.append(len(images))\n        # 按比例随机采样验证样本\n        val_path = random.sample(images, k=int(len(images) * val_rate))\n\n        for img_path in images:\n            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集\n                val_images_path.append(img_path)\n                val_images_label.append(image_class)\n            else:  # 否则存入训练集\n                train_images_path.append(img_path)\n                train_images_label.append(image_class)\n\n    print(\"{} images were found in the dataset.\\n{} for training, {} for validation\".format(sum(every_class_num),\n                                                                                            len(train_images_path),\n                                                                                            len(val_images_path)\n                                                                                            ))\n\n    plot_image = False\n    if plot_image:\n        # 绘制每种类别个数柱状图\n        plt.bar(range(len(flower_class)), every_class_num, align='center')\n        # 将横坐标0,1,2,3,4替换为相应的类别名称\n        plt.xticks(range(len(flower_class)), flower_class)\n        # 在柱状图上添加数值标签\n        for i, v in enumerate(every_class_num):\n            plt.text(x=i, y=v + 5, s=str(v), ha='center')\n        # 设置x坐标\n        plt.xlabel('image class')\n        # 设置y坐标\n        plt.ylabel('number of images')\n        # 设置柱状图的标题\n        plt.title('flower class distribution')\n        plt.show()\n\n    return train_images_path, train_images_label, val_images_path, val_images_label\n\n\ndef generate_ds(data_root: str,\n                im_height: int,\n                im_width: int,\n                batch_size: int,\n                val_rate: float = 0.1):\n    \"\"\"\n    读取划分数据集，并生成训练集和验证集的迭代器\n    :param data_root: 数据根目录\n    :param im_height: 输入网络图像的高度\n    :param im_width:  输入网络图像的宽度\n    :param batch_size: 训练使用的batch size\n    :param val_rate:  将数据按给定比例划分到验证集\n    :return:\n    \"\"\"\n    train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate)\n    AUTOTUNE = tf.data.experimental.AUTOTUNE\n\n    def process_train_info(img_path, label):\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image, channels=3)\n        image = tf.image.convert_image_dtype(image, tf.float32)\n        # image = tf.cast(image, tf.float32)\n        # image = tf.image.resize(image, [im_height, im_width])\n        image = tf.image.resize_with_crop_or_pad(image, im_height, im_width)\n        image = tf.image.random_flip_left_right(image)\n        image = (image - 0.5) / 0.5\n        return image, label\n\n    def process_val_info(img_path, label):\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image, channels=3)\n        image = tf.image.convert_image_dtype(image, tf.float32)\n        # image = tf.cast(image, tf.float32)\n        # image = tf.image.resize(image, [im_height, im_width])\n        image = tf.image.resize_with_crop_or_pad(image, im_height, im_width)\n        image = (image - 0.5) / 0.5\n        return image, label\n\n    # Configure dataset for performance\n    def configure_for_performance(ds,\n                                  shuffle_size: int,\n                                  shuffle: bool = False):\n        ds = ds.cache()  # 读取数据后缓存至内存\n        if shuffle:\n            ds = ds.shuffle(buffer_size=shuffle_size)  # 打乱数据顺序\n        ds = ds.batch(batch_size)                      # 指定batch size\n        ds = ds.prefetch(buffer_size=AUTOTUNE)         # 在训练的同时提前准备下一个step的数据\n        return ds\n\n    train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path),\n                                                   tf.constant(train_img_label)))\n    total_train = len(train_img_path)\n\n    # Use Dataset.map to create a dataset of image, label pairs\n    train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE)\n    train_ds = configure_for_performance(train_ds, total_train, shuffle=True)\n\n    val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path),\n                                                 tf.constant(val_img_label)))\n    total_val = len(val_img_path)\n    # Use Dataset.map to create a dataset of image, label pairs\n    val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE)\n    val_ds = configure_for_performance(val_ds, total_val)\n\n    return train_ds, val_ds\n"
  },
  {
    "path": "tensorflow_classification/Test7_shuffleNet/model.py",
    "content": "import tensorflow as tf\nfrom tensorflow.keras import layers, Model\n\n\nclass ConvBNReLU(layers.Layer):\n    def __init__(self,\n                 filters: int = 1,\n                 kernel_size: int = 1,\n                 strides: int = 1,\n                 padding: str = 'same',\n                 **kwargs):\n        super(ConvBNReLU, self).__init__(**kwargs)\n\n        self.conv = layers.Conv2D(filters=filters,\n                                  kernel_size=kernel_size,\n                                  strides=strides,\n                                  padding=padding,\n                                  use_bias=False,\n                                  kernel_regularizer=tf.keras.regularizers.l2(4e-5),\n                                  name=\"conv1\")\n        self.bn = layers.BatchNormalization(momentum=0.9, name=\"bn\")\n        self.relu = layers.ReLU()\n\n    def call(self, inputs, training=None, **kwargs):\n        x = self.conv(inputs)\n        x = self.bn(x, training=training)\n        x = self.relu(x)\n        return x\n\n\nclass DWConvBN(layers.Layer):\n    def __init__(self,\n                 kernel_size: int = 3,\n                 strides: int = 1,\n                 padding: str = 'same',\n                 **kwargs):\n        super(DWConvBN, self).__init__(**kwargs)\n        self.dw_conv = layers.DepthwiseConv2D(kernel_size=kernel_size,\n                                              strides=strides,\n                                              padding=padding,\n                                              use_bias=False,\n                                              kernel_regularizer=tf.keras.regularizers.l2(4e-5),\n                                              name=\"dw1\")\n        self.bn = layers.BatchNormalization(momentum=0.9, name=\"bn\")\n\n    def call(self, inputs, training=None, **kwargs):\n        x = self.dw_conv(inputs)\n        x = self.bn(x, training=training)\n        return x\n\n\nclass ChannelShuffle(layers.Layer):\n    def __init__(self, shape, groups: int = 2, **kwargs):\n        super(ChannelShuffle, self).__init__(**kwargs)\n        batch_size, height, width, num_channels = shape\n        assert num_channels % 2 == 0\n        channel_per_group = num_channels // groups\n\n        # Tuple of integers, does not include the samples dimension (batch size).\n        self.reshape1 = layers.Reshape((height, width, groups, channel_per_group))\n        self.reshape2 = layers.Reshape((height, width, num_channels))\n\n    def call(self, inputs, **kwargs):\n        x = self.reshape1(inputs)\n        x = tf.transpose(x, perm=[0, 1, 2, 4, 3])\n        x = self.reshape2(x)\n        return x\n\n\nclass ChannelSplit(layers.Layer):\n    def __init__(self, num_splits: int = 2, **kwargs):\n        super(ChannelSplit, self).__init__(**kwargs)\n        self.num_splits = num_splits\n\n    def call(self, inputs, **kwargs):\n        b1, b2 = tf.split(inputs,\n                          num_or_size_splits=self.num_splits,\n                          axis=-1)\n        return b1, b2\n\n\ndef shuffle_block_s1(inputs, output_c: int, stride: int, prefix: str):\n    if stride != 1:\n        raise ValueError(\"illegal stride value.\")\n\n    assert output_c % 2 == 0\n    branch_c = output_c // 2\n\n    x1, x2 = ChannelSplit(name=prefix + \"/split\")(inputs)\n\n    # main branch\n    x2 = ConvBNReLU(filters=branch_c, name=prefix + \"/b2_conv1\")(x2)\n    x2 = DWConvBN(kernel_size=3, strides=stride, name=prefix + \"/b2_dw1\")(x2)\n    x2 = ConvBNReLU(filters=branch_c, name=prefix + \"/b2_conv2\")(x2)\n\n    x = layers.Concatenate(name=prefix + \"/concat\")([x1, x2])\n    x = ChannelShuffle(x.shape, name=prefix + \"/channelshuffle\")(x)\n\n    return x\n\n\ndef shuffle_block_s2(inputs, output_c: int, stride: int, prefix: str):\n    if stride != 2:\n        raise ValueError(\"illegal stride value.\")\n\n    assert output_c % 2 == 0\n    branch_c = output_c // 2\n\n    # shortcut branch\n    x1 = DWConvBN(kernel_size=3, strides=stride, name=prefix + \"/b1_dw1\")(inputs)\n    x1 = ConvBNReLU(filters=branch_c, name=prefix + \"/b1_conv1\")(x1)\n\n    # main branch\n    x2 = ConvBNReLU(filters=branch_c, name=prefix + \"/b2_conv1\")(inputs)\n    x2 = DWConvBN(kernel_size=3, strides=stride, name=prefix + \"/b2_dw1\")(x2)\n    x2 = ConvBNReLU(filters=branch_c, name=prefix + \"/b2_conv2\")(x2)\n\n    x = layers.Concatenate(name=prefix + \"/concat\")([x1, x2])\n    x = ChannelShuffle(x.shape, name=prefix + \"/channelshuffle\")(x)\n\n    return x\n\n\ndef shufflenet_v2(num_classes: int,\n                  input_shape: tuple,\n                  stages_repeats: list,\n                  stages_out_channels: list):\n    img_input = layers.Input(shape=input_shape)\n    if len(stages_repeats) != 3:\n        raise ValueError(\"expected stages_repeats as list of 3 positive ints\")\n    if len(stages_out_channels) != 5:\n        raise ValueError(\"expected stages_out_channels as list of 5 positive ints\")\n\n    x = ConvBNReLU(filters=stages_out_channels[0],\n                   kernel_size=3,\n                   strides=2,\n                   name=\"conv1\")(img_input)\n\n    x = layers.MaxPooling2D(pool_size=(3, 3),\n                            strides=2,\n                            padding='same',\n                            name=\"maxpool\")(x)\n\n    stage_name = [\"stage{}\".format(i) for i in [2, 3, 4]]\n    for name, repeats, output_channels in zip(stage_name,\n                                              stages_repeats,\n                                              stages_out_channels[1:]):\n        for i in range(repeats):\n            if i == 0:\n                x = shuffle_block_s2(x, output_c=output_channels, stride=2, prefix=name + \"_{}\".format(i))\n            else:\n                x = shuffle_block_s1(x, output_c=output_channels, stride=1, prefix=name + \"_{}\".format(i))\n\n    x = ConvBNReLU(filters=stages_out_channels[-1], name=\"conv5\")(x)\n\n    x = layers.GlobalAveragePooling2D(name=\"globalpool\")(x)\n\n    x = layers.Dense(units=num_classes, name=\"fc\")(x)\n    x = layers.Softmax()(x)\n\n    model = Model(img_input, x, name=\"ShuffleNetV2_1.0\")\n\n    return model\n\n\ndef shufflenet_v2_x1_0(num_classes=1000, input_shape=(224, 224, 3)):\n    # 权重链接: https://pan.baidu.com/s/1M2mp98Si9eT9qT436DcdOw  密码: mhts\n    model = shufflenet_v2(num_classes=num_classes,\n                          input_shape=input_shape,\n                          stages_repeats=[4, 8, 4],\n                          stages_out_channels=[24, 116, 232, 464, 1024])\n    return model\n\n\ndef shufflenet_v2_x0_5(num_classes=1000, input_shape=(224, 224, 3)):\n    model = shufflenet_v2(num_classes=num_classes,\n                          input_shape=input_shape,\n                          stages_repeats=[4, 8, 4],\n                          stages_out_channels=[24, 48, 96, 192, 1024])\n    return model\n\n\ndef shufflenet_v2_x2_0(num_classes=1000, input_shape=(224, 224, 3)):\n    model = shufflenet_v2(num_classes=num_classes,\n                          input_shape=input_shape,\n                          stages_repeats=[4, 8, 4],\n                          stages_out_channels=[24, 244, 488, 976, 2048])\n    return model\n"
  },
  {
    "path": "tensorflow_classification/Test7_shuffleNet/predict.py",
    "content": "import os\nimport json\nimport glob\nimport numpy as np\n\nfrom PIL import Image\nimport matplotlib.pyplot as plt\n\nfrom model import shufflenet_v2_x1_0\n\n\ndef main():\n    im_height = 224\n    im_width = 224\n    num_classes = 5\n\n    mean = [0.485, 0.456, 0.406]\n    std = [0.229, 0.224, 0.225]\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n    # resize image to 224x224\n    img = img.resize((im_width, im_height))\n    plt.imshow(img)\n\n    # scaling pixel value to (-1,1)\n    img = np.array(img).astype(np.float32)\n    img = (img / 255. - mean) / std\n\n    # Add the image to a batch where it's the only member.\n    img = (np.expand_dims(img, 0))\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    # create model\n    model = shufflenet_v2_x1_0(num_classes=num_classes)\n\n    weights_path = './save_weights/shufflenetv2.ckpt'\n    assert len(glob.glob(weights_path+\"*\")), \"cannot find {}\".format(weights_path)\n    model.load_weights(weights_path)\n\n    result = np.squeeze(model.predict(img))\n    predict_class = np.argmax(result)\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_class)],\n                                                 result[predict_class])\n    plt.title(print_res)\n    for i in range(len(result)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  result[i]))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test7_shuffleNet/train.py",
    "content": "import os\nimport sys\nimport math\nimport datetime\n\nimport tensorflow as tf\nfrom tqdm import tqdm\n\nfrom model import shufflenet_v2_x1_0\nfrom utils import generate_ds\n\nassert tf.version.VERSION >= \"2.4.0\", \"version of tf must greater/equal than 2.4.0\"\n\n\ndef main():\n    data_root = \"/data/flower_photos\"  # get data root path\n\n    if not os.path.exists(\"./save_weights\"):\n        os.makedirs(\"./save_weights\")\n\n    im_height = 224\n    im_width = 224\n    batch_size = 16\n    epochs = 30\n    num_classes = 5\n\n    log_dir = \"./logs/\" + datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n    train_writer = tf.summary.create_file_writer(os.path.join(log_dir, \"train\"))\n    val_writer = tf.summary.create_file_writer(os.path.join(log_dir, \"val\"))\n\n    # data generator with data augmentation\n    train_ds, val_ds = generate_ds(data_root, im_height, im_width, batch_size)\n\n    # create model\n    model = shufflenet_v2_x1_0(input_shape=(im_height, im_width, 3),\n                               num_classes=num_classes)\n\n    # load weights\n    # x1.0权重链接: https://pan.baidu.com/s/1M2mp98Si9eT9qT436DcdOw  密码: mhts\n    pre_weights_path = './shufflenetv2_x1_0.h5'\n    assert os.path.exists(pre_weights_path), \"cannot find {}\".format(pre_weights_path)\n    model.load_weights(pre_weights_path, by_name=True, skip_mismatch=True)\n\n    model.summary()\n\n    # custom learning rate curve\n    def scheduler(now_epoch):\n        initial_lr = 0.1\n        end_lr_rate = 0.1  # end_lr = initial_lr * end_lr_rate\n        rate = ((1 + math.cos(now_epoch * math.pi / epochs)) / 2) * (1 - end_lr_rate) + end_lr_rate  # cosine\n        new_lr = rate * initial_lr\n\n        # writing lr into tensorboard\n        with train_writer.as_default():\n            tf.summary.scalar('learning rate', data=new_lr, step=epoch)\n\n        return new_lr\n\n    # using keras low level api for training\n    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)\n    optimizer = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9)\n\n    train_loss = tf.keras.metrics.Mean(name='train_loss')\n    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')\n\n    val_loss = tf.keras.metrics.Mean(name='val_loss')\n    val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')\n\n    @tf.function\n    def train_step(train_images, train_labels):\n        with tf.GradientTape() as tape:\n            output = model(train_images, training=True)\n            loss = loss_object(train_labels, output)\n        gradients = tape.gradient(loss, model.trainable_variables)\n        optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n\n        train_loss(loss)\n        train_accuracy(train_labels, output)\n\n    @tf.function\n    def val_step(val_images, val_labels):\n        output = model(val_images, training=False)\n        loss = loss_object(val_labels, output)\n\n        val_loss(loss)\n        val_accuracy(val_labels, output)\n\n    best_val_acc = 0.\n    for epoch in range(epochs):\n        train_loss.reset_states()  # clear history info\n        train_accuracy.reset_states()  # clear history info\n        val_loss.reset_states()  # clear history info\n        val_accuracy.reset_states()  # clear history info\n\n        # train\n        train_bar = tqdm(train_ds, file=sys.stdout)\n        for images, labels in train_bar:\n            train_step(images, labels)\n\n            # print train process\n            train_bar.desc = \"train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}\".format(epoch + 1,\n                                                                                 epochs,\n                                                                                 train_loss.result(),\n                                                                                 train_accuracy.result())\n\n        # update learning rate\n        optimizer.learning_rate = scheduler(epoch)\n\n        # validate\n        val_bar = tqdm(val_ds, file=sys.stdout)\n        for images, labels in val_bar:\n            val_step(images, labels)\n\n            # print val process\n            val_bar.desc = \"valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}\".format(epoch + 1,\n                                                                               epochs,\n                                                                               val_loss.result(),\n                                                                               val_accuracy.result())\n        # writing training loss and acc\n        with train_writer.as_default():\n            tf.summary.scalar(\"loss\", train_loss.result(), epoch)\n            tf.summary.scalar(\"accuracy\", train_accuracy.result(), epoch)\n\n        # writing validation loss and acc\n        with val_writer.as_default():\n            tf.summary.scalar(\"loss\", val_loss.result(), epoch)\n            tf.summary.scalar(\"accuracy\", val_accuracy.result(), epoch)\n\n        # only save best weights\n        if val_accuracy.result() > best_val_acc:\n            best_val_acc = val_accuracy.result()\n            model.save_weights(\"./save_weights/shufflenetv2.ckpt\", save_format=\"tf\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test7_shuffleNet/trans_weights.py",
    "content": "import torch\nimport numpy as np\n\nfrom model import shufflenet_v2_x1_0\n\n\ndef main():\n\n    m = shufflenet_v2_x1_0()\n    m_info = [(i.name.replace(\":0\", \"\"), list(i.shape))\n              for i in m.weights]\n\n    weights_path = './shufflenetv2_x1.pth'\n    weights_dict = torch.load(weights_path)\n    new_weights_dict = dict()\n    for key, value in weights_dict.items():\n        if \"conv1.0.weight\" == key:\n            value = np.transpose(value.detach().numpy(), (2, 3, 1, 0)).astype(np.float32)\n            new_weights_dict[\"conv1/conv1/kernel\"] = value\n        elif \"conv1.1.weight\" == key:\n            new_weights_dict[\"conv1/bn/gamma\"] = value\n        elif \"conv1.1.bias\" == key:\n            new_weights_dict[\"conv1/bn/beta\"] = value\n        elif \"conv1.1.running_mean\" == key:\n            new_weights_dict[\"conv1/bn/moving_mean\"] = value\n        elif \"conv1.1.running_var\" == key:\n            new_weights_dict[\"conv1/bn/moving_variance\"] = value\n        elif \"stage\" in key:\n            names = key.split(\".branch\")\n            num_stage, num_block = names[0].replace(\"stage\", \"\").split(\".\")\n            tf_name_prefix = \"stage{}_{}/\".format(num_stage, num_block)\n\n            torch_name2tf_name = {\"1.0.weight\": \"b1_dw1/dw1/depthwise_kernel\",\n                                  \"1.1.weight\": \"b1_dw1/bn/gamma\",\n                                  \"1.1.bias\": \"b1_dw1/bn/beta\",\n                                  \"1.1.running_mean\": \"b1_dw1/bn/moving_mean\",\n                                  \"1.1.running_var\": \"b1_dw1/bn/moving_variance\",\n                                  \"1.2.weight\": \"b1_conv1/conv1/kernel\",\n                                  \"1.3.weight\": \"b1_conv1/bn/gamma\",\n                                  \"1.3.bias\": \"b1_conv1/bn/beta\",\n                                  \"1.3.running_mean\": \"b1_conv1/bn/moving_mean\",\n                                  \"1.3.running_var\": \"b1_conv1/bn/moving_variance\",\n                                  \"2.0.weight\": \"b2_conv1/conv1/kernel\",\n                                  \"2.1.weight\": \"b2_conv1/bn/gamma\",\n                                  \"2.1.bias\": \"b2_conv1/bn/beta\",\n                                  \"2.1.running_mean\": \"b2_conv1/bn/moving_mean\",\n                                  \"2.1.running_var\": \"b2_conv1/bn/moving_variance\",\n                                  \"2.3.weight\": \"b2_dw1/dw1/depthwise_kernel\",\n                                  \"2.4.weight\": \"b2_dw1/bn/gamma\",\n                                  \"2.4.bias\": \"b2_dw1/bn/beta\",\n                                  \"2.4.running_mean\": \"b2_dw1/bn/moving_mean\",\n                                  \"2.4.running_var\": \"b2_dw1/bn/moving_variance\",\n                                  \"2.5.weight\": \"b2_conv2/conv1/kernel\",\n                                  \"2.6.weight\": \"b2_conv2/bn/gamma\",\n                                  \"2.6.bias\": \"b2_conv2/bn/beta\",\n                                  \"2.6.running_mean\": \"b2_conv2/bn/moving_mean\",\n                                  \"2.6.running_var\": \"b2_conv2/bn/moving_variance\"}\n\n            tf_name_postfix = torch_name2tf_name[names[1]]\n            tf_name = tf_name_prefix + tf_name_postfix\n\n            if len(value.shape) > 1:  # conv or dwconv\n                if \"dw\" in tf_name:\n                    value = np.transpose(value.detach().numpy(), (2, 3, 0, 1)).astype(np.float32)\n                else:\n                    value = np.transpose(value.detach().numpy(), (2, 3, 1, 0)).astype(np.float32)\n\n            new_weights_dict[tf_name] = value\n\n        elif \"conv5.0.weight\" == key:\n            value = np.transpose(value.detach().numpy(), (2, 3, 1, 0)).astype(np.float32)\n            new_weights_dict[\"conv5/conv1/kernel\"] = value\n        elif \"conv5.1.weight\" == key:\n            new_weights_dict[\"conv5/bn/gamma\"] = value\n        elif \"conv5.1.bias\" == key:\n            new_weights_dict[\"conv5/bn/beta\"] = value\n        elif \"conv5.1.running_mean\" == key:\n            new_weights_dict[\"conv5/bn/moving_mean\"] = value\n        elif \"conv5.1.running_var\" == key:\n            new_weights_dict[\"conv5/bn/moving_variance\"] = value\n\n        elif \"fc.weight\" == key:\n            value = np.transpose(value.detach().numpy(), (1, 0)).astype(np.float32)\n            new_weights_dict[\"fc/kernel\"] = value\n\n        elif \"fc.bias\" == key:\n            new_weights_dict[\"fc/bias\"] = value\n        else:\n            print(key)\n\n    assert len(m_info) == len(new_weights_dict)\n\n    weights_list = []\n    for name, shape in m_info:\n        assert name in new_weights_dict, \"not found key:'{}'\".format(name)\n        assert tuple(shape) == new_weights_dict[name].shape, \\\n            \"tf shape:'{}', trans shape:'{}'\".format(shape,\n                                                     new_weights_dict[name].shape)\n        weights_list.append(new_weights_dict[name])\n\n    m.set_weights(weights_list)\n    m.save_weights(\"shufflenetv2_x1_0.h5\", save_format=\"h5\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test7_shuffleNet/utils.py",
    "content": "import os\nimport json\nimport random\n\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\n\n\ndef read_split_data(root: str, val_rate: float = 0.2):\n    random.seed(0)  # 保证随机划分结果一致\n    assert os.path.exists(root), \"dataset root: {} does not exist.\".format(root)\n\n    # 遍历文件夹，一个文件夹对应一个类别\n    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]\n    # 排序，保证顺序一致\n    flower_class.sort()\n    # 生成类别名称以及对应的数字索引\n    class_indices = dict((k, v) for v, k in enumerate(flower_class))\n    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    train_images_path = []  # 存储训练集的所有图片路径\n    train_images_label = []  # 存储训练集图片对应索引信息\n    val_images_path = []  # 存储验证集的所有图片路径\n    val_images_label = []  # 存储验证集图片对应索引信息\n    every_class_num = []  # 存储每个类别的样本总数\n    supported = [\".jpg\", \".JPG\", \".jpeg\", \".JPEG\"]  # 支持的文件后缀类型\n    # 遍历每个文件夹下的文件\n    for cla in flower_class:\n        cla_path = os.path.join(root, cla)\n        # 遍历获取supported支持的所有文件路径\n        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)\n                  if os.path.splitext(i)[-1] in supported]\n        # 获取该类别对应的索引\n        image_class = class_indices[cla]\n        # 记录该类别的样本数量\n        every_class_num.append(len(images))\n        # 按比例随机采样验证样本\n        val_path = random.sample(images, k=int(len(images) * val_rate))\n\n        for img_path in images:\n            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集\n                val_images_path.append(img_path)\n                val_images_label.append(image_class)\n            else:  # 否则存入训练集\n                train_images_path.append(img_path)\n                train_images_label.append(image_class)\n\n    print(\"{} images were found in the dataset.\\n{} for training, {} for validation\".format(sum(every_class_num),\n                                                                                            len(train_images_path),\n                                                                                            len(val_images_path)\n                                                                                            ))\n\n    plot_image = False\n    if plot_image:\n        # 绘制每种类别个数柱状图\n        plt.bar(range(len(flower_class)), every_class_num, align='center')\n        # 将横坐标0,1,2,3,4替换为相应的类别名称\n        plt.xticks(range(len(flower_class)), flower_class)\n        # 在柱状图上添加数值标签\n        for i, v in enumerate(every_class_num):\n            plt.text(x=i, y=v + 5, s=str(v), ha='center')\n        # 设置x坐标\n        plt.xlabel('image class')\n        # 设置y坐标\n        plt.ylabel('number of images')\n        # 设置柱状图的标题\n        plt.title('flower class distribution')\n        plt.show()\n\n    return train_images_path, train_images_label, val_images_path, val_images_label\n\n\ndef generate_ds(data_root: str,\n                im_height: int,\n                im_width: int,\n                batch_size: int,\n                val_rate: float = 0.1):\n    \"\"\"\n    读取划分数据集，并生成训练集和验证集的迭代器\n    :param data_root: 数据根目录\n    :param im_height: 输入网络图像的高度\n    :param im_width:  输入网络图像的宽度\n    :param batch_size: 训练使用的batch size\n    :param val_rate:  将数据按给定比例划分到验证集\n    :return:\n    \"\"\"\n    train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate)\n    AUTOTUNE = tf.data.experimental.AUTOTUNE\n\n    mean = [0.485, 0.456, 0.406]\n    std = [0.229, 0.224, 0.225]\n\n    def process_train_info(img_path, label):\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image, channels=3)\n        image = tf.image.convert_image_dtype(image, tf.float32)\n        # image = tf.cast(image, tf.float32)\n        # image = tf.image.resize(image, [im_height, im_width])\n        image = tf.image.resize_with_crop_or_pad(image, im_height, im_width)\n        image = tf.image.random_flip_left_right(image)\n        image = (image - mean) / std\n        return image, label\n\n    def process_val_info(img_path, label):\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image, channels=3)\n        image = tf.image.convert_image_dtype(image, tf.float32)\n        # image = tf.cast(image, tf.float32)\n        # image = tf.image.resize(image, [im_height, im_width])\n        image = tf.image.resize_with_crop_or_pad(image, im_height, im_width)\n        image = (image - mean) / std\n        return image, label\n\n    # Configure dataset for performance\n    def configure_for_performance(ds,\n                                  shuffle_size: int,\n                                  shuffle: bool = False):\n        ds = ds.cache()  # 读取数据后缓存至内存\n        if shuffle:\n            ds = ds.shuffle(buffer_size=shuffle_size)  # 打乱数据顺序\n        ds = ds.batch(batch_size)                      # 指定batch size\n        ds = ds.prefetch(buffer_size=AUTOTUNE)         # 在训练的同时提前准备下一个step的数据\n        return ds\n\n    train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path),\n                                                   tf.constant(train_img_label)))\n    total_train = len(train_img_path)\n\n    # Use Dataset.map to create a dataset of image, label pairs\n    train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE)\n    train_ds = configure_for_performance(train_ds, total_train, shuffle=True)\n\n    val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path),\n                                                 tf.constant(val_img_label)))\n    total_val = len(val_img_path)\n    # Use Dataset.map to create a dataset of image, label pairs\n    val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE)\n    val_ds = configure_for_performance(val_ds, total_val)\n\n    return train_ds, val_ds\n"
  },
  {
    "path": "tensorflow_classification/Test9_efficientNet/model.py",
    "content": "import math\nfrom typing import Union\n\nfrom tensorflow.keras import layers, Model\n\n\nCONV_KERNEL_INITIALIZER = {\n    'class_name': 'VarianceScaling',\n    'config': {\n        'scale': 2.0,\n        'mode': 'fan_out',\n        'distribution': 'truncated_normal'\n    }\n}\n\nDENSE_KERNEL_INITIALIZER = {\n    'class_name': 'VarianceScaling',\n    'config': {\n        'scale': 1. / 3.,\n        'mode': 'fan_out',\n        'distribution': 'uniform'\n    }\n}\n\n\ndef correct_pad(input_size: Union[int, tuple], kernel_size: int):\n    \"\"\"Returns a tuple for zero-padding for 2D convolution with downsampling.\n\n    Arguments:\n      input_size: Input tensor size.\n      kernel_size: An integer or tuple/list of 2 integers.\n\n    Returns:\n      A tuple.\n    \"\"\"\n\n    if isinstance(input_size, int):\n        input_size = (input_size, input_size)\n\n    kernel_size = (kernel_size, kernel_size)\n\n    adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2)\n    correct = (kernel_size[0] // 2, kernel_size[1] // 2)\n    return ((correct[0] - adjust[0], correct[0]),\n            (correct[1] - adjust[1], correct[1]))\n\n\ndef block(inputs,\n          activation: str = \"swish\",\n          drop_rate: float = 0.,\n          name: str = \"\",\n          input_channel: int = 32,\n          output_channel: int = 16,\n          kernel_size: int = 3,\n          strides: int = 1,\n          expand_ratio: int = 1,\n          use_se: bool = True,\n          se_ratio: float = 0.25):\n    \"\"\"An inverted residual block.\n\n      Arguments:\n          inputs: input tensor.\n          activation: activation function.\n          drop_rate: float between 0 and 1, fraction of the input units to drop.\n          name: string, block label.\n          input_channel: integer, the number of input filters.\n          output_channel: integer, the number of output filters.\n          kernel_size: integer, the dimension of the convolution window.\n          strides: integer, the stride of the convolution.\n          expand_ratio: integer, scaling coefficient for the input filters.\n          use_se: whether to use se\n          se_ratio: float between 0 and 1, fraction to squeeze the input filters.\n\n      Returns:\n          output tensor for the block.\n      \"\"\"\n    # Expansion phase\n    filters = input_channel * expand_ratio\n    if expand_ratio != 1:\n        x = layers.Conv2D(filters=filters,\n                          kernel_size=1,\n                          padding=\"same\",\n                          use_bias=False,\n                          kernel_initializer=CONV_KERNEL_INITIALIZER,\n                          name=name + \"expand_conv\")(inputs)\n        x = layers.BatchNormalization(name=name + \"expand_bn\")(x)\n        x = layers.Activation(activation, name=name + \"expand_activation\")(x)\n    else:\n        x = inputs\n\n    # Depthwise Convolution\n    if strides == 2:\n        x = layers.ZeroPadding2D(padding=correct_pad(filters, kernel_size),\n                                 name=name + \"dwconv_pad\")(x)\n\n    x = layers.DepthwiseConv2D(kernel_size=kernel_size,\n                               strides=strides,\n                               padding=\"same\" if strides == 1 else \"valid\",\n                               use_bias=False,\n                               depthwise_initializer=CONV_KERNEL_INITIALIZER,\n                               name=name + \"dwconv\")(x)\n    x = layers.BatchNormalization(name=name + \"bn\")(x)\n    x = layers.Activation(activation, name=name + \"activation\")(x)\n\n    if use_se:\n        filters_se = int(input_channel * se_ratio)\n        se = layers.GlobalAveragePooling2D(name=name + \"se_squeeze\")(x)\n        se = layers.Reshape((1, 1, filters), name=name + \"se_reshape\")(se)\n        se = layers.Conv2D(filters=filters_se,\n                           kernel_size=1,\n                           padding=\"same\",\n                           activation=activation,\n                           kernel_initializer=CONV_KERNEL_INITIALIZER,\n                           name=name + \"se_reduce\")(se)\n        se = layers.Conv2D(filters=filters,\n                           kernel_size=1,\n                           padding=\"same\",\n                           activation=\"sigmoid\",\n                           kernel_initializer=CONV_KERNEL_INITIALIZER,\n                           name=name + \"se_expand\")(se)\n        x = layers.multiply([x, se], name=name + \"se_excite\")\n\n    # Output phase\n    x = layers.Conv2D(filters=output_channel,\n                      kernel_size=1,\n                      padding=\"same\",\n                      use_bias=False,\n                      kernel_initializer=CONV_KERNEL_INITIALIZER,\n                      name=name + \"project_conv\")(x)\n    x = layers.BatchNormalization(name=name + \"project_bn\")(x)\n    if strides == 1 and input_channel == output_channel:\n        if drop_rate > 0:\n            x = layers.Dropout(rate=drop_rate,\n                               noise_shape=(None, 1, 1, 1),  # binary dropout mask\n                               name=name + \"drop\")(x)\n        x = layers.add([x, inputs], name=name + \"add\")\n\n    return x\n\n\ndef efficient_net(width_coefficient,\n                  depth_coefficient,\n                  input_shape=(224, 224, 3),\n                  dropout_rate=0.2,\n                  drop_connect_rate=0.2,\n                  activation=\"swish\",\n                  model_name=\"efficientnet\",\n                  include_top=True,\n                  num_classes=1000):\n    \"\"\"Instantiates the EfficientNet architecture using given scaling coefficients.\n\n      Reference:\n      - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](\n          https://arxiv.org/abs/1905.11946) (ICML 2019)\n\n      Optionally loads weights pre-trained on ImageNet.\n      Note that the data format convention used by the model is\n      the one specified in your Keras config at `~/.keras/keras.json`.\n\n      Arguments:\n        width_coefficient: float, scaling coefficient for network width.\n        depth_coefficient: float, scaling coefficient for network depth.\n        input_shape: tuple, default input image shape(not including the batch size).\n        dropout_rate: float, dropout rate before final classifier layer.\n        drop_connect_rate: float, dropout rate at skip connections.\n        activation: activation function.\n        model_name: string, model name.\n        include_top: whether to include the fully-connected\n            layer at the top of the network.\n        num_classes: optional number of classes to classify images\n            into, only to be specified if `include_top` is True, and\n            if no `weights` argument is specified.\n\n      Returns:\n        A `keras.Model` instance.\n    \"\"\"\n\n    # kernel_size, repeats, in_channel, out_channel, exp_ratio, strides, SE\n    block_args = [[3, 1, 32, 16, 1, 1, True],\n                  [3, 2, 16, 24, 6, 2, True],\n                  [5, 2, 24, 40, 6, 2, True],\n                  [3, 3, 40, 80, 6, 2, True],\n                  [5, 3, 80, 112, 6, 1, True],\n                  [5, 4, 112, 192, 6, 2, True],\n                  [3, 1, 192, 320, 6, 1, True]]\n\n    def round_filters(filters, divisor=8):\n        \"\"\"Round number of filters based on depth multiplier.\"\"\"\n        filters *= width_coefficient\n        new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor)\n        # Make sure that round down does not go down by more than 10%.\n        if new_filters < 0.9 * filters:\n            new_filters += divisor\n        return int(new_filters)\n\n    def round_repeats(repeats):\n        \"\"\"Round number of repeats based on depth multiplier.\"\"\"\n        return int(math.ceil(depth_coefficient * repeats))\n\n    img_input = layers.Input(shape=input_shape)\n\n    # data preprocessing\n    x = layers.experimental.preprocessing.Rescaling(1. / 255.)(img_input)\n    x = layers.experimental.preprocessing.Normalization()(x)\n\n    # first conv2d\n    x = layers.ZeroPadding2D(padding=correct_pad(input_shape[:2], 3),\n                             name=\"stem_conv_pad\")(x)\n    x = layers.Conv2D(filters=round_filters(32),\n                      kernel_size=3,\n                      strides=2,\n                      padding=\"valid\",\n                      use_bias=False,\n                      kernel_initializer=CONV_KERNEL_INITIALIZER,\n                      name=\"stem_conv\")(x)\n    x = layers.BatchNormalization(name=\"stem_bn\")(x)\n    x = layers.Activation(activation, name=\"stem_activation\")(x)\n\n    # build blocks\n    b = 0\n    num_blocks = float(sum(round_repeats(i[1]) for i in block_args))\n    for i, args in enumerate(block_args):\n        assert args[1] > 0\n        # Update block input and output filters based on depth multiplier.\n        args[2] = round_filters(args[2])  # input_channel\n        args[3] = round_filters(args[3])  # output_channel\n\n        for j in range(round_repeats(args[1])):\n            x = block(x,\n                      activation=activation,\n                      drop_rate=drop_connect_rate * b / num_blocks,\n                      name=\"block{}{}_\".format(i + 1, chr(j + 97)),\n                      kernel_size=args[0],\n                      input_channel=args[2] if j == 0 else args[3],\n                      output_channel=args[3],\n                      expand_ratio=args[4],\n                      strides=args[5] if j == 0 else 1,\n                      use_se=args[6])\n            b += 1\n\n    # build top\n    x = layers.Conv2D(round_filters(1280),\n                      kernel_size=1,\n                      padding=\"same\",\n                      use_bias=False,\n                      kernel_initializer=CONV_KERNEL_INITIALIZER,\n                      name=\"top_conv\")(x)\n    x = layers.BatchNormalization(name=\"top_bn\")(x)\n    x = layers.Activation(activation, name=\"top_activation\")(x)\n    if include_top:\n        x = layers.GlobalAveragePooling2D(name=\"avg_pool\")(x)\n        if dropout_rate > 0:\n            x = layers.Dropout(dropout_rate, name=\"top_dropout\")(x)\n        x = layers.Dense(units=num_classes,\n                         activation=\"softmax\",\n                         kernel_initializer=DENSE_KERNEL_INITIALIZER,\n                         name=\"predictions\")(x)\n\n    model = Model(img_input, x, name=model_name)\n\n    return model\n\n\ndef efficientnet_b0(num_classes=1000,\n                    include_top=True,\n                    input_shape=(224, 224, 3)):\n    # https://storage.googleapis.com/keras-applications/efficientnetb0.h5\n    return efficient_net(width_coefficient=1.0,\n                         depth_coefficient=1.0,\n                         input_shape=input_shape,\n                         dropout_rate=0.2,\n                         model_name=\"efficientnetb0\",\n                         include_top=include_top,\n                         num_classes=num_classes)\n\n\ndef efficientnet_b1(num_classes=1000,\n                    include_top=True,\n                    input_shape=(240, 240, 3)):\n    # https://storage.googleapis.com/keras-applications/efficientnetb1.h5\n    return efficient_net(width_coefficient=1.0,\n                         depth_coefficient=1.1,\n                         input_shape=input_shape,\n                         dropout_rate=0.2,\n                         model_name=\"efficientnetb1\",\n                         include_top=include_top,\n                         num_classes=num_classes)\n\n\ndef efficientnet_b2(num_classes=1000,\n                    include_top=True,\n                    input_shape=(260, 260, 3)):\n    # https://storage.googleapis.com/keras-applications/efficientnetb2.h5\n    return efficient_net(width_coefficient=1.1,\n                         depth_coefficient=1.2,\n                         input_shape=input_shape,\n                         dropout_rate=0.3,\n                         model_name=\"efficientnetb2\",\n                         include_top=include_top,\n                         num_classes=num_classes)\n\n\ndef efficientnet_b3(num_classes=1000,\n                    include_top=True,\n                    input_shape=(300, 300, 3)):\n    # https://storage.googleapis.com/keras-applications/efficientnetb3.h5\n    return efficient_net(width_coefficient=1.2,\n                         depth_coefficient=1.4,\n                         input_shape=input_shape,\n                         dropout_rate=0.3,\n                         model_name=\"efficientnetb3\",\n                         include_top=include_top,\n                         num_classes=num_classes)\n\n\ndef efficientnet_b4(num_classes=1000,\n                    include_top=True,\n                    input_shape=(380, 380, 3)):\n    # https://storage.googleapis.com/keras-applications/efficientnetb4.h5\n    return efficient_net(width_coefficient=1.4,\n                         depth_coefficient=1.8,\n                         input_shape=input_shape,\n                         dropout_rate=0.4,\n                         model_name=\"efficientnetb4\",\n                         include_top=include_top,\n                         num_classes=num_classes)\n\n\ndef efficientnet_b5(num_classes=1000,\n                    include_top=True,\n                    input_shape=(456, 456, 3)):\n    # https://storage.googleapis.com/keras-applications/efficientnetb5.h5\n    return efficient_net(width_coefficient=1.6,\n                         depth_coefficient=2.2,\n                         input_shape=input_shape,\n                         dropout_rate=0.4,\n                         model_name=\"efficientnetb5\",\n                         include_top=include_top,\n                         num_classes=num_classes)\n\n\ndef efficientnet_b6(num_classes=1000,\n                    include_top=True,\n                    input_shape=(528, 528, 3)):\n    # https://storage.googleapis.com/keras-applications/efficientnetb6.h5\n    return efficient_net(width_coefficient=1.8,\n                         depth_coefficient=2.6,\n                         input_shape=input_shape,\n                         dropout_rate=0.5,\n                         model_name=\"efficientnetb6\",\n                         include_top=include_top,\n                         num_classes=num_classes)\n\n\ndef efficientnet_b7(num_classes=1000,\n                    include_top=True,\n                    input_shape=(600, 600, 3)):\n    # https://storage.googleapis.com/keras-applications/efficientnetb7.h5\n    return efficient_net(width_coefficient=2.0,\n                         depth_coefficient=3.1,\n                         input_shape=input_shape,\n                         dropout_rate=0.5,\n                         model_name=\"efficientnetb7\",\n                         include_top=include_top,\n                         num_classes=num_classes)\n"
  },
  {
    "path": "tensorflow_classification/Test9_efficientNet/predict.py",
    "content": "import os\nimport json\nimport glob\nimport numpy as np\n\nfrom PIL import Image\nimport matplotlib.pyplot as plt\n\nfrom model import efficientnet_b0 as create_model\n\n\ndef main():\n    num_classes = 5\n\n    img_size = {\"B0\": 224,\n                \"B1\": 240,\n                \"B2\": 260,\n                \"B3\": 300,\n                \"B4\": 380,\n                \"B5\": 456,\n                \"B6\": 528,\n                \"B7\": 600}\n    num_model = \"B0\"\n    im_height = im_width = img_size[num_model]\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n    # resize image to 224x224\n    img = img.resize((im_width, im_height))\n    plt.imshow(img)\n\n    # read image\n    img = np.array(img).astype(np.float32)\n\n    # Add the image to a batch where it's the only member.\n    img = (np.expand_dims(img, 0))\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    # create model\n    model = create_model(num_classes=num_classes)\n\n    weights_path = './save_weights/efficientnet.ckpt'\n    assert len(glob.glob(weights_path+\"*\")), \"cannot find {}\".format(weights_path)\n    model.load_weights(weights_path)\n\n    result = np.squeeze(model.predict(img))\n    predict_class = np.argmax(result)\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_class)],\n                                                 result[predict_class])\n    plt.title(print_res)\n    for i in range(len(result)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  result[i]))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test9_efficientNet/train.py",
    "content": "import os\nimport sys\nimport math\nimport datetime\n\nimport tensorflow as tf\nfrom tqdm import tqdm\n\nfrom model import efficientnet_b0 as create_model\nfrom utils import generate_ds\n\nassert tf.version.VERSION >= \"2.4.0\", \"version of tf must greater/equal than 2.4.0\"\n\n\ndef main():\n    data_root = \"/data/flower_photos\"  # get data root path\n\n    if not os.path.exists(\"./save_weights\"):\n        os.makedirs(\"./save_weights\")\n\n    img_size = {\"B0\": 224,\n                \"B1\": 240,\n                \"B2\": 260,\n                \"B3\": 300,\n                \"B4\": 380,\n                \"B5\": 456,\n                \"B6\": 528,\n                \"B7\": 600}\n\n    num_model = \"B0\"\n    im_height = im_width = img_size[num_model]\n    batch_size = 16\n    epochs = 30\n    num_classes = 5\n    freeze_layers = True\n    initial_lr = 0.01\n\n    log_dir = \"./logs/\" + datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n    train_writer = tf.summary.create_file_writer(os.path.join(log_dir, \"train\"))\n    val_writer = tf.summary.create_file_writer(os.path.join(log_dir, \"val\"))\n\n    # data generator with data augmentation\n    train_ds, val_ds = generate_ds(data_root, im_height, im_width, batch_size)\n\n    # create model\n    model = create_model(num_classes=num_classes)\n\n    # load weights\n    pre_weights_path = './efficientnetb0.h5'\n    assert os.path.exists(pre_weights_path), \"cannot find {}\".format(pre_weights_path)\n    model.load_weights(pre_weights_path, by_name=True, skip_mismatch=True)\n\n    # freeze bottom layers\n    if freeze_layers:\n        unfreeze_layers = [\"top_conv\", \"top_bn\", \"predictions\"]\n        for layer in model.layers:\n            if layer.name not in unfreeze_layers:\n                layer.trainable = False\n            else:\n                print(\"training {}\".format(layer.name))\n\n    model.summary()\n\n    # custom learning rate curve\n    def scheduler(now_epoch):\n        end_lr_rate = 0.01  # end_lr = initial_lr * end_lr_rate\n        rate = ((1 + math.cos(now_epoch * math.pi / epochs)) / 2) * (1 - end_lr_rate) + end_lr_rate  # cosine\n        new_lr = rate * initial_lr\n\n        # writing lr into tensorboard\n        with train_writer.as_default():\n            tf.summary.scalar('learning rate', data=new_lr, step=epoch)\n\n        return new_lr\n\n    # using keras low level api for training\n    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)\n    optimizer = tf.keras.optimizers.SGD(learning_rate=initial_lr, momentum=0.9)\n\n    train_loss = tf.keras.metrics.Mean(name='train_loss')\n    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')\n\n    val_loss = tf.keras.metrics.Mean(name='val_loss')\n    val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')\n\n    @tf.function\n    def train_step(train_images, train_labels):\n        with tf.GradientTape() as tape:\n            output = model(train_images, training=True)\n            loss = loss_object(train_labels, output)\n        gradients = tape.gradient(loss, model.trainable_variables)\n        optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n\n        train_loss(loss)\n        train_accuracy(train_labels, output)\n\n    @tf.function\n    def val_step(val_images, val_labels):\n        output = model(val_images, training=False)\n        loss = loss_object(val_labels, output)\n\n        val_loss(loss)\n        val_accuracy(val_labels, output)\n\n    best_val_acc = 0.\n    for epoch in range(epochs):\n        train_loss.reset_states()  # clear history info\n        train_accuracy.reset_states()  # clear history info\n        val_loss.reset_states()  # clear history info\n        val_accuracy.reset_states()  # clear history info\n\n        # train\n        train_bar = tqdm(train_ds, file=sys.stdout)\n        for images, labels in train_bar:\n            train_step(images, labels)\n\n            # print train process\n            train_bar.desc = \"train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}\".format(epoch + 1,\n                                                                                 epochs,\n                                                                                 train_loss.result(),\n                                                                                 train_accuracy.result())\n\n        # update learning rate\n        optimizer.learning_rate = scheduler(epoch)\n\n        # validate\n        val_bar = tqdm(val_ds, file=sys.stdout)\n        for images, labels in val_bar:\n            val_step(images, labels)\n\n            # print val process\n            val_bar.desc = \"valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}\".format(epoch + 1,\n                                                                               epochs,\n                                                                               val_loss.result(),\n                                                                               val_accuracy.result())\n        # writing training loss and acc\n        with train_writer.as_default():\n            tf.summary.scalar(\"loss\", train_loss.result(), epoch)\n            tf.summary.scalar(\"accuracy\", train_accuracy.result(), epoch)\n\n        # writing validation loss and acc\n        with val_writer.as_default():\n            tf.summary.scalar(\"loss\", val_loss.result(), epoch)\n            tf.summary.scalar(\"accuracy\", val_accuracy.result(), epoch)\n\n        # only save best weights\n        if val_accuracy.result() > best_val_acc:\n            best_val_acc = val_accuracy.result()\n            save_name = \"./save_weights/efficientnet.ckpt\"\n            model.save_weights(save_name, save_format=\"tf\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/Test9_efficientNet/utils.py",
    "content": "import os\nimport json\nimport random\n\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\n\n\ndef read_split_data(root: str, val_rate: float = 0.2):\n    random.seed(0)  # 保证随机划分结果一致\n    assert os.path.exists(root), \"dataset root: {} does not exist.\".format(root)\n\n    # 遍历文件夹，一个文件夹对应一个类别\n    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]\n    # 排序，保证顺序一致\n    flower_class.sort()\n    # 生成类别名称以及对应的数字索引\n    class_indices = dict((k, v) for v, k in enumerate(flower_class))\n    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    train_images_path = []  # 存储训练集的所有图片路径\n    train_images_label = []  # 存储训练集图片对应索引信息\n    val_images_path = []  # 存储验证集的所有图片路径\n    val_images_label = []  # 存储验证集图片对应索引信息\n    every_class_num = []  # 存储每个类别的样本总数\n    supported = [\".jpg\", \".JPG\", \".jpeg\", \".JPEG\"]  # 支持的文件后缀类型\n    # 遍历每个文件夹下的文件\n    for cla in flower_class:\n        cla_path = os.path.join(root, cla)\n        # 遍历获取supported支持的所有文件路径\n        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)\n                  if os.path.splitext(i)[-1] in supported]\n        # 获取该类别对应的索引\n        image_class = class_indices[cla]\n        # 记录该类别的样本数量\n        every_class_num.append(len(images))\n        # 按比例随机采样验证样本\n        val_path = random.sample(images, k=int(len(images) * val_rate))\n\n        for img_path in images:\n            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集\n                val_images_path.append(img_path)\n                val_images_label.append(image_class)\n            else:  # 否则存入训练集\n                train_images_path.append(img_path)\n                train_images_label.append(image_class)\n\n    print(\"{} images were found in the dataset.\\n{} for training, {} for validation\".format(sum(every_class_num),\n                                                                                            len(train_images_path),\n                                                                                            len(val_images_path)\n                                                                                            ))\n\n    plot_image = False\n    if plot_image:\n        # 绘制每种类别个数柱状图\n        plt.bar(range(len(flower_class)), every_class_num, align='center')\n        # 将横坐标0,1,2,3,4替换为相应的类别名称\n        plt.xticks(range(len(flower_class)), flower_class)\n        # 在柱状图上添加数值标签\n        for i, v in enumerate(every_class_num):\n            plt.text(x=i, y=v + 5, s=str(v), ha='center')\n        # 设置x坐标\n        plt.xlabel('image class')\n        # 设置y坐标\n        plt.ylabel('number of images')\n        # 设置柱状图的标题\n        plt.title('flower class distribution')\n        plt.show()\n\n    return train_images_path, train_images_label, val_images_path, val_images_label\n\n\ndef generate_ds(data_root: str,\n                im_height: int,\n                im_width: int,\n                batch_size: int,\n                val_rate: float = 0.1):\n    \"\"\"\n    读取划分数据集，并生成训练集和验证集的迭代器\n    :param data_root: 数据根目录\n    :param im_height: 输入网络图像的高度\n    :param im_width:  输入网络图像的宽度\n    :param batch_size: 训练使用的batch size\n    :param val_rate:  将数据按给定比例划分到验证集\n    :return:\n    \"\"\"\n    train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate)\n    AUTOTUNE = tf.data.experimental.AUTOTUNE\n\n    def process_train_info(img_path, label):\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image, channels=3)\n        image = tf.cast(image, tf.float32)\n        image = tf.image.resize_with_crop_or_pad(image, im_height, im_width)\n        image = tf.image.random_flip_left_right(image)\n        return image, label\n\n    def process_val_info(img_path, label):\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image, channels=3)\n        image = tf.cast(image, tf.float32)\n        image = tf.image.resize_with_crop_or_pad(image, im_height, im_width)\n        return image, label\n\n    # Configure dataset for performance\n    def configure_for_performance(ds,\n                                  shuffle_size: int,\n                                  shuffle: bool = False):\n        ds = ds.cache()  # 读取数据后缓存至内存\n        if shuffle:\n            ds = ds.shuffle(buffer_size=shuffle_size)  # 打乱数据顺序\n        ds = ds.batch(batch_size)                      # 指定batch size\n        ds = ds.prefetch(buffer_size=AUTOTUNE)         # 在训练的同时提前准备下一个step的数据\n        return ds\n\n    train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path),\n                                                   tf.constant(train_img_label)))\n    total_train = len(train_img_path)\n\n    # Use Dataset.map to create a dataset of image, label pairs\n    train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE)\n    train_ds = configure_for_performance(train_ds, total_train, shuffle=True)\n\n    val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path),\n                                                 tf.constant(val_img_label)))\n    total_val = len(val_img_path)\n    # Use Dataset.map to create a dataset of image, label pairs\n    val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE)\n    val_ds = configure_for_performance(val_ds, total_val)\n\n    return train_ds, val_ds\n"
  },
  {
    "path": "tensorflow_classification/analyze_weights_featuremap/alexnet_model.py",
    "content": "from tensorflow.keras import layers, models, Model, Sequential\n\n\ndef AlexNet_v1(im_height=224, im_width=224, class_num=1000):\n    # tensorflow中的tensor通道排序是NHWC\n    input_image = layers.Input(shape=(im_height, im_width, 3), dtype=\"float32\")  # output(None, 224, 224, 3)\n    x = layers.ZeroPadding2D(((1, 2), (1, 2)))(input_image)                      # output(None, 227, 227, 3)\n    x = layers.Conv2D(48, kernel_size=11, strides=4, activation=\"relu\")(x)       # output(None, 55, 55, 48)\n    x = layers.MaxPool2D(pool_size=3, strides=2)(x)                              # output(None, 27, 27, 48)\n    x = layers.Conv2D(128, kernel_size=5, padding=\"same\", activation=\"relu\")(x)  # output(None, 27, 27, 128)\n    x = layers.MaxPool2D(pool_size=3, strides=2)(x)                              # output(None, 13, 13, 128)\n    x = layers.Conv2D(192, kernel_size=3, padding=\"same\", activation=\"relu\")(x)  # output(None, 13, 13, 192)\n    x = layers.Conv2D(192, kernel_size=3, padding=\"same\", activation=\"relu\")(x)  # output(None, 13, 13, 192)\n    x = layers.Conv2D(128, kernel_size=3, padding=\"same\", activation=\"relu\")(x)  # output(None, 13, 13, 128)\n    x = layers.MaxPool2D(pool_size=3, strides=2)(x)                              # output(None, 6, 6, 128)\n\n    x = layers.Flatten()(x)                         # output(None, 6*6*128)\n    x = layers.Dropout(0.2)(x)\n    x = layers.Dense(2048, activation=\"relu\")(x)    # output(None, 2048)\n    x = layers.Dropout(0.2)(x)\n    x = layers.Dense(2048, activation=\"relu\")(x)    # output(None, 2048)\n    x = layers.Dense(class_num)(x)                  # output(None, 5)\n    predict = layers.Softmax()(x)\n\n    model = models.Model(inputs=input_image, outputs=predict)\n    return model\n\n\nclass AlexNet_v2(Model):\n    def __init__(self, class_num=1000):\n        super(AlexNet_v2, self).__init__()\n        self.features = Sequential([\n            layers.ZeroPadding2D(((1, 2), (1, 2))),                                 # output(None, 227, 227, 3)\n            layers.Conv2D(48, kernel_size=11, strides=4, activation=\"relu\"),        # output(None, 55, 55, 48)\n            layers.MaxPool2D(pool_size=3, strides=2),                               # output(None, 27, 27, 48)\n            layers.Conv2D(128, kernel_size=5, padding=\"same\", activation=\"relu\"),   # output(None, 27, 27, 128)\n            layers.MaxPool2D(pool_size=3, strides=2),                               # output(None, 13, 13, 128)\n            layers.Conv2D(192, kernel_size=3, padding=\"same\", activation=\"relu\"),   # output(None, 13, 13, 192)\n            layers.Conv2D(192, kernel_size=3, padding=\"same\", activation=\"relu\"),   # output(None, 13, 13, 192)\n            layers.Conv2D(128, kernel_size=3, padding=\"same\", activation=\"relu\"),   # output(None, 13, 13, 128)\n            layers.MaxPool2D(pool_size=3, strides=2)])                              # output(None, 6, 6, 128)\n\n        self.flatten = layers.Flatten()\n        self.classifier = Sequential([\n            layers.Dropout(0.2),\n            layers.Dense(1024, activation=\"relu\"),                                  # output(None, 2048)\n            layers.Dropout(0.2),\n            layers.Dense(128, activation=\"relu\"),                                   # output(None, 2048)\n            layers.Dense(class_num),                                                # output(None, 5)\n            layers.Softmax()\n        ])\n\n    def call(self, inputs, **kwargs):\n        x = self.features(inputs)\n        x = self.flatten(x)\n        x = self.classifier(x)\n        return x\n\n    def receive_feature_map(self, x, layers_name):\n        outputs = []\n        for module in self.features.layers:\n            x = module(x)\n            if module.name in layers_name:\n                outputs.append(x)\n        return outputs\n"
  },
  {
    "path": "tensorflow_classification/analyze_weights_featuremap/analyze_feature_map.py",
    "content": "from alexnet_model import AlexNet_v1, AlexNet_v2\nfrom PIL import Image\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom tensorflow.keras import Model, Input\n\nim_height = 224\nim_width = 224\n\n# load image\nimg = Image.open(\"../tulip.jpg\")\n# resize image to 224x224\nimg = img.resize((im_width, im_height))\n\n# scaling pixel value to (0-1)\nimg = np.array(img) / 255.\n\n# Add the image to a batch where it's the only member.\nimg = (np.expand_dims(img, 0))\n\n\nmodel = AlexNet_v1(class_num=5)  # functional api\n# model = AlexNet_v2(class_num=5)  # subclass api\n# model.build((None, 224, 224, 3))\n# If `by_name` is False weights are loaded based on the network's topology.\nmodel.load_weights(\"./myAlex.h5\")\n# model.load_weights(\"./submodel.h5\")\n# for layer in model.layers:\n#     print(layer.name)\nmodel.summary()\nlayers_name = [\"conv2d\", \"conv2d_1\"]\n\n# functional API\ntry:\n    input_node = model.input\n    output_node = [model.get_layer(name=layer_name).output for layer_name in layers_name]\n    model1 = Model(inputs=input_node, outputs=output_node)\n    outputs = model1.predict(img)\n    for index, feature_map in enumerate(outputs):\n        # [N, H, W, C] -> [H, W, C]\n        im = np.squeeze(feature_map)\n\n        # show top 12 feature maps\n        plt.figure()\n        for i in range(12):\n            ax = plt.subplot(3, 4, i + 1)\n            # [H, W, C]\n            plt.imshow(im[:, :, i], cmap='gray')\n        plt.suptitle(layers_name[index])\n        plt.show()\nexcept Exception as e:\n    print(e)\n\n# subclasses API\n# outputs = model.receive_feature_map(img, layers_name)\n# for index, feature_maps in enumerate(outputs):\n#     # [N, H, W, C] -> [H, W, C]\n#     im = np.squeeze(feature_maps)\n#\n#     # show top 12 feature maps\n#     plt.figure()\n#     for i in range(12):\n#         ax = plt.subplot(3, 4, i + 1)\n#         # [H, W, C]\n#         plt.imshow(im[:, :, i], cmap='gray')\n#     plt.suptitle(layers_name[index])\n#     plt.show()\n"
  },
  {
    "path": "tensorflow_classification/analyze_weights_featuremap/analyze_kernel_weight.py",
    "content": "from alexnet_model import AlexNet_v1, AlexNet_v2\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nmodel = AlexNet_v1(class_num=5)  # functional api\n# model = AlexNet_v2(class_num=5)  # subclass api\n# model.build((None, 224, 224, 3))\nmodel.load_weights(\"./myAlex.h5\")\n# model.load_weights(\"./submodel.h5\")\nmodel.summary()\nfor layer in model.layers:\n    for index, weight in enumerate(layer.weights):\n        # [kernel_height, kernel_width, kernel_channel, kernel_number]\n        weight_t = weight.numpy()\n        # read a kernel information\n        # k = weight_t[:, :, :, 0]\n\n        # calculate mean, std, min, max\n        weight_mean = weight_t.mean()\n        weight_std = weight_t.std(ddof=1)\n        weight_min = weight_t.min()\n        weight_max = weight_t.max()\n        print(\"mean is {}, std is {}, min is {}, max is {}\".format(weight_mean,\n                                                                   weight_std,\n                                                                   weight_max,\n                                                                   weight_min))\n\n        # plot hist image\n        plt.close()\n        weight_vec = np.reshape(weight_t, [-1])\n        plt.hist(weight_vec, bins=50)\n        plt.title(weight.name)\n        plt.show()"
  },
  {
    "path": "tensorflow_classification/custom_dataset/train_fit.py",
    "content": "import os\nimport math\nimport datetime\n\nimport tensorflow as tf\n\nfrom utils import generate_ds\n\n\ndef main():\n    data_root = \"/home/wz/my_project/my_github/data_set/flower_data/flower_photos\"  # get data root path\n\n    if not os.path.exists(\"./save_weights\"):\n        os.makedirs(\"./save_weights\")\n\n    num_classes = 5\n    im_height = 224\n    im_width = 224\n    batch_size = 8\n    epochs = 20\n    log_dir = \"logs/fit/\" + datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n\n    train_ds, val_ds = generate_ds(data_root, im_height, im_width, batch_size)\n\n    # create base model\n    base_model = tf.keras.applications.ResNet50(include_top=False,\n                                                input_shape=(224, 224, 3),\n                                                weights='imagenet')\n    # freeze base model\n    base_model.trainable = False\n    base_model.summary()\n\n    # create new model on top\n    inputs = tf.keras.Input(shape=(224, 224, 3))\n    x = tf.keras.applications.resnet50.preprocess_input(inputs)\n    x = base_model(x, training=False)\n    x = tf.keras.layers.GlobalAveragePooling2D()(x)\n    outputs = tf.keras.layers.Dense(num_classes)(x)\n    model = tf.keras.Model(inputs, outputs)\n    model.summary()\n\n    model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.9),\n                  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n                  metrics=['accuracy'])\n\n    def scheduler(epoch):\n        \"\"\"\n        自定义学习率变化\n        :param epoch: 当前训练epoch\n        :return:\n        \"\"\"\n        initial_lr = 0.01\n        end_lr = 0.001\n        rate = ((1 + math.cos(epoch * math.pi / epochs)) / 2) * (1 - end_lr) + end_lr  # cosine\n        new_lr = rate * initial_lr\n\n        return new_lr\n\n    callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/model_{epoch}.h5',\n                                                    save_best_only=True,\n                                                    save_weights_only=True,\n                                                    monitor='val_accuracy'),\n                 tf.keras.callbacks.TensorBoard(log_dir=log_dir,\n                                                write_graph=True,\n                                                histogram_freq=1),\n                 tf.keras.callbacks.LearningRateScheduler(schedule=scheduler)]\n\n    model.fit(x=train_ds,\n              epochs=epochs,\n              validation_data=val_ds,\n              callbacks=callbacks)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/custom_dataset/utils.py",
    "content": "import os\nimport json\nimport random\n\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\n\n\ndef read_split_data(root: str, val_rate: float = 0.2):\n    random.seed(0)  # 保证随机划分结果一致\n    assert os.path.exists(root), \"dataset root: {} does not exist.\".format(root)\n\n    # 遍历文件夹，一个文件夹对应一个类别\n    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]\n    # 排序，保证顺序一致\n    flower_class.sort()\n    # 生成类别名称以及对应的数字索引\n    class_indices = dict((k, v) for v, k in enumerate(flower_class))\n    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    train_images_path = []  # 存储训练集的所有图片路径\n    train_images_label = []  # 存储训练集图片对应索引信息\n    val_images_path = []  # 存储验证集的所有图片路径\n    val_images_label = []  # 存储验证集图片对应索引信息\n    every_class_num = []  # 存储每个类别的样本总数\n    supported = [\".jpg\", \".JPG\", \".jpeg\", \".JPEG\"]  # 支持的文件后缀类型\n    # 遍历每个文件夹下的文件\n    for cla in flower_class:\n        cla_path = os.path.join(root, cla)\n        # 遍历获取supported支持的所有文件路径\n        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)\n                  if os.path.splitext(i)[-1] in supported]\n        # 获取该类别对应的索引\n        image_class = class_indices[cla]\n        # 记录该类别的样本数量\n        every_class_num.append(len(images))\n        # 按比例随机采样验证样本\n        val_path = random.sample(images, k=int(len(images) * val_rate))\n\n        for img_path in images:\n            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集\n                val_images_path.append(img_path)\n                val_images_label.append(image_class)\n            else:  # 否则存入训练集\n                train_images_path.append(img_path)\n                train_images_label.append(image_class)\n\n    print(\"{} images were found in the dataset.\\n{} for training, {} for validation\".format(sum(every_class_num),\n                                                                                            len(train_images_path),\n                                                                                            len(val_images_path)\n                                                                                            ))\n\n    plot_image = False\n    if plot_image:\n        # 绘制每种类别个数柱状图\n        plt.bar(range(len(flower_class)), every_class_num, align='center')\n        # 将横坐标0,1,2,3,4替换为相应的类别名称\n        plt.xticks(range(len(flower_class)), flower_class)\n        # 在柱状图上添加数值标签\n        for i, v in enumerate(every_class_num):\n            plt.text(x=i, y=v + 5, s=str(v), ha='center')\n        # 设置x坐标\n        plt.xlabel('image class')\n        # 设置y坐标\n        plt.ylabel('number of images')\n        # 设置柱状图的标题\n        plt.title('flower class distribution')\n        plt.show()\n\n    return train_images_path, train_images_label, val_images_path, val_images_label\n\n\ndef generate_ds(data_root: str,\n                im_height: int,\n                im_width: int,\n                batch_size: int,\n                val_rate: float = 0.1):\n    \"\"\"\n    读取划分数据集，并生成训练集和验证集的迭代器\n    :param data_root: 数据根目录\n    :param im_height: 输入网络图像的高度\n    :param im_width:  输入网络图像的宽度\n    :param batch_size: 训练使用的batch size\n    :param val_rate:  将数据按给定比例划分到验证集\n    :return:\n    \"\"\"\n    train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate)\n    AUTOTUNE = tf.data.experimental.AUTOTUNE\n\n    def process_train_info(img_path, label):\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image, channels=3)\n        # image = tf.image.convert_image_dtype(image, tf.float32)\n        image = tf.cast(image, tf.float32)\n        # image = tf.image.resize(image, [im_height, im_width])\n        image = tf.image.resize_with_crop_or_pad(image, im_height, im_width)\n        image = tf.image.random_flip_left_right(image)\n        return image, label\n\n    def process_val_info(img_path, label):\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image, channels=3)\n        # image = tf.image.convert_image_dtype(image, tf.float32)\n        image = tf.cast(image, tf.float32)\n        # image = tf.image.resize(image, [im_height, im_width])\n        image = tf.image.resize_with_crop_or_pad(image, im_height, im_width)\n        return image, label\n\n    # Configure dataset for performance\n    def configure_for_performance(ds,\n                                  shuffle_size: int,\n                                  shuffle: bool = False):\n        ds = ds.cache()  # 读取数据后缓存至内存\n        if shuffle:\n            ds = ds.shuffle(buffer_size=shuffle_size)  # 打乱数据顺序\n        ds = ds.batch(batch_size)                      # 指定batch size\n        ds = ds.prefetch(buffer_size=AUTOTUNE)         # 在训练的同时提前准备下一个step的数据\n        return ds\n\n    train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path),\n                                                   tf.constant(train_img_label)))\n    total_train = len(train_img_path)\n\n    # Use Dataset.map to create a dataset of image, label pairs\n    train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE)\n    train_ds = configure_for_performance(train_ds, total_train, shuffle=True)\n\n    val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path),\n                                                 tf.constant(val_img_label)))\n    total_val = len(val_img_path)\n    # Use Dataset.map to create a dataset of image, label pairs\n    val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE)\n    val_ds = configure_for_performance(val_ds, total_val)\n\n    return train_ds, val_ds\n"
  },
  {
    "path": "tensorflow_classification/swin_transformer/model.py",
    "content": "import tensorflow as tf\nfrom tensorflow.keras import Model, layers, initializers\nimport numpy as np\n\n\nclass PatchEmbed(layers.Layer):\n    \"\"\"\n    2D Image to Patch Embedding\n    \"\"\"\n    def __init__(self, patch_size=4, embed_dim=96, norm_layer=None):\n        super(PatchEmbed, self).__init__()\n        self.embed_dim = embed_dim\n        self.patch_size = (patch_size, patch_size)\n        self.norm = norm_layer(epsilon=1e-6, name=\"norm\") if norm_layer else layers.Activation('linear')\n\n        self.proj = layers.Conv2D(filters=embed_dim, kernel_size=patch_size,\n                                  strides=patch_size, padding='SAME',\n                                  kernel_initializer=initializers.LecunNormal(),\n                                  bias_initializer=initializers.Zeros(),\n                                  name=\"proj\")\n\n    def call(self, x, **kwargs):\n        _, H, W, _ = x.shape\n\n        # padding\n        # 如果输入图片的H，W不是patch_size的整数倍，需要进行padding\n        pad_input = (H % self.patch_size[0] != 0) or (W % self.patch_size[1] != 0)\n        if pad_input:\n            paddings = tf.constant([[0, 0],\n                                    [0, self.patch_size[0] - H % self.patch_size[0]],\n                                    [0, self.patch_size[1] - W % self.patch_size[1]]])\n            x = tf.pad(x, paddings)\n\n        # 下采样patch_size倍\n        x = self.proj(x)\n        B, H, W, C = x.shape\n        # [B, H, W, C] -> [B, H*W, C]\n        x = tf.reshape(x, [B, -1, C])\n        x = self.norm(x)\n        return x, H, W\n\n\ndef window_partition(x, window_size: int):\n    \"\"\"\n        将feature map按照window_size划分成一个个没有重叠的window\n        Args:\n            x: (B, H, W, C)\n            window_size (int): window size(M)\n\n        Returns:\n            windows: (num_windows*B, window_size, window_size, C)\n        \"\"\"\n    B, H, W, C = x.shape\n    x = tf.reshape(x, [B, H // window_size, window_size, W // window_size, window_size, C])\n    # transpose: [B, H//Mh, Mh, W//Mw, Mw, C] -> [B, H//Mh, W//Mh, Mw, Mw, C]\n    # reshape: [B, H//Mh, W//Mw, Mh, Mw, C] -> [B*num_windows, Mh, Mw, C]\n    x = tf.transpose(x, [0, 1, 3, 2, 4, 5])\n    windows = tf.reshape(x, [-1, window_size, window_size, C])\n    return windows\n\n\ndef window_reverse(windows, window_size: int, H: int, W: int):\n    \"\"\"\n    将一个个window还原成一个feature map\n    Args:\n        windows: (num_windows*B, window_size, window_size, C)\n        window_size (int): Window size(M)\n        H (int): Height of image\n        W (int): Width of image\n\n    Returns:\n        x: (B, H, W, C)\n    \"\"\"\n    B = int(windows.shape[0] / (H * W / window_size / window_size))\n    # reshape: [B*num_windows, Mh, Mw, C] -> [B, H//Mh, W//Mw, Mh, Mw, C]\n    x = tf.reshape(windows, [B, H // window_size, W // window_size, window_size, window_size, -1])\n    # permute: [B, H//Mh, W//Mw, Mh, Mw, C] -> [B, H//Mh, Mh, W//Mw, Mw, C]\n    # reshape: [B, H//Mh, Mh, W//Mw, Mw, C] -> [B, H, W, C]\n    x = tf.transpose(x, [0, 1, 3, 2, 4, 5])\n    x = tf.reshape(x, [B, H, W, -1])\n    return x\n\n\nclass PatchMerging(layers.Layer):\n    def __init__(self, dim: int, norm_layer=layers.LayerNormalization, name=None):\n        super(PatchMerging, self).__init__(name=name)\n        self.dim = dim\n        self.reduction = layers.Dense(2*dim,\n                                      use_bias=False,\n                                      kernel_initializer=initializers.TruncatedNormal(stddev=0.02),\n                                      name=\"reduction\")\n        self.norm = norm_layer(epsilon=1e-6, name=\"norm\")\n\n    def call(self, x, H, W):\n        \"\"\"\n        x: [B, H*W, C]\n        \"\"\"\n        B, L, C = x.shape\n        assert L == H * W, \"input feature has wrong size\"\n\n        x = tf.reshape(x, [B, H, W, C])\n        # padding\n        # 如果输入feature map的H，W不是2的整数倍，需要进行padding\n        pad_input = (H % 2 != 0) or (W % 2 != 0)\n        if pad_input:\n            paddings = tf.constant([[0, 0],\n                                    [0, 1],\n                                    [0, 1],\n                                    [0, 0]])\n            x = tf.pad(x, paddings)\n\n        x0 = x[:, 0::2, 0::2, :]  # [B, H/2, W/2, C]\n        x1 = x[:, 1::2, 0::2, :]  # [B, H/2, W/2, C]\n        x2 = x[:, 0::2, 1::2, :]  # [B, H/2, W/2, C]\n        x3 = x[:, 1::2, 1::2, :]  # [B, H/2, W/2, C]\n        x = tf.concat([x0, x1, x2, x3], -1)  # [B, H/2, W/2, 4*C]\n        x = tf.reshape(x, [B, -1, 4*C])  # [B, H/2*W/2, 4*C]\n\n        x = self.norm(x)\n        x = self.reduction(x)  # [B, H/2*W/2, 2*C]\n\n        return x\n\n\nclass MLP(layers.Layer):\n    \"\"\"\n    MLP as used in Vision Transformer, MLP-Mixer and related networks\n    \"\"\"\n\n    k_ini = initializers.TruncatedNormal(stddev=0.02)\n    b_ini = initializers.Zeros()\n\n    def __init__(self, in_features, mlp_ratio=4.0, drop=0., name=None):\n        super(MLP, self).__init__(name=name)\n        self.fc1 = layers.Dense(int(in_features * mlp_ratio), name=\"fc1\",\n                                kernel_initializer=self.k_ini, bias_initializer=self.b_ini)\n        self.act = layers.Activation(\"gelu\")\n        self.fc2 = layers.Dense(in_features, name=\"fc2\",\n                                kernel_initializer=self.k_ini, bias_initializer=self.b_ini)\n        self.drop = layers.Dropout(drop)\n\n    def call(self, x, training=None):\n        x = self.fc1(x)\n        x = self.act(x)\n        x = self.drop(x, training=training)\n        x = self.fc2(x)\n        x = self.drop(x, training=training)\n        return x\n\n\nclass WindowAttention(layers.Layer):\n    r\"\"\" Window based multi-head self attention (W-MSA) module with relative position bias.\n    It supports both of shifted and non-shifted window.\n\n    Args:\n        dim (int): Number of input channels.\n        window_size (tuple[int]): The height and width of the window.\n        num_heads (int): Number of attention heads.\n        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True\n        attn_drop_ratio (float, optional): Dropout ratio of attention weight. Default: 0.0\n        proj_drop_ratio (float, optional): Dropout ratio of output. Default: 0.0\n    \"\"\"\n\n    k_ini = initializers.GlorotUniform()\n    b_ini = initializers.Zeros()\n\n    def __init__(self,\n                 dim,\n                 window_size,\n                 num_heads=8,\n                 qkv_bias=False,\n                 attn_drop_ratio=0.,\n                 proj_drop_ratio=0.,\n                 name=None):\n        super(WindowAttention, self).__init__(name=name)\n        self.dim = dim\n        self.window_size = window_size  # [Mh, Mw]\n        self.num_heads = num_heads\n        head_dim = dim // num_heads\n        self.scale = head_dim ** -0.5\n\n        self.qkv = layers.Dense(dim * 3, use_bias=qkv_bias, name=\"qkv\",\n                                kernel_initializer=self.k_ini, bias_initializer=self.b_ini)\n        self.attn_drop = layers.Dropout(attn_drop_ratio)\n        self.proj = layers.Dense(dim, name=\"proj\",\n                                 kernel_initializer=self.k_ini, bias_initializer=self.b_ini)\n        self.proj_drop = layers.Dropout(proj_drop_ratio)\n\n    def build(self, input_shape):\n        # define a parameter table of relative position bias\n        # [2*Mh-1 * 2*Mw-1, nH]\n        self.relative_position_bias_table = self.add_weight(\n            shape=[(2 * self.window_size[0] - 1) * (2 * self.window_size[1] - 1), self.num_heads],\n            initializer=initializers.TruncatedNormal(stddev=0.02),\n            trainable=True,\n            dtype=tf.float32,\n            name=\"relative_position_bias_table\"\n        )\n\n        coords_h = np.arange(self.window_size[0])\n        coords_w = np.arange(self.window_size[1])\n        coords = np.stack(np.meshgrid(coords_h, coords_w, indexing=\"ij\"))  # [2, Mh, Mw]\n        coords_flatten = np.reshape(coords, [2, -1])  # [2, Mh*Mw]\n        # [2, Mh*Mw, 1] - [2, 1, Mh*Mw]\n        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # [2, Mh*Mw, Mh*Mw]\n        relative_coords = np.transpose(relative_coords, [1, 2, 0])   # [Mh*Mw, Mh*Mw, 2]\n        relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0\n        relative_coords[:, :, 1] += self.window_size[1] - 1\n        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1\n        relative_position_index = relative_coords.sum(-1)  # [Mh*Mw, Mh*Mw]\n\n        self.relative_position_index = tf.Variable(tf.convert_to_tensor(relative_position_index),\n                                                   trainable=False,\n                                                   dtype=tf.int64,\n                                                   name=\"relative_position_index\")\n\n    def call(self, x, mask=None, training=None):\n        \"\"\"\n        Args:\n            x: input features with shape of (num_windows*B, Mh*Mw, C)\n            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None\n            training: whether training mode\n        \"\"\"\n        # [batch_size*num_windows, Mh*Mw, total_embed_dim]\n        B_, N, C = x.shape\n\n        # qkv(): -> [batch_size*num_windows, Mh*Mw, 3 * total_embed_dim]\n        qkv = self.qkv(x)\n        # reshape: -> [batch_size*num_windows, Mh*Mw, 3, num_heads, embed_dim_per_head]\n        qkv = tf.reshape(qkv, [B_, N, 3, self.num_heads, C // self.num_heads])\n        # transpose: -> [3, batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head]\n        qkv = tf.transpose(qkv, [2, 0, 3, 1, 4])\n        # [batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head]\n        q, k, v = qkv[0], qkv[1], qkv[2]\n\n        # transpose: -> [batch_size*num_windows, num_heads, embed_dim_per_head, Mh*Mw]\n        # multiply -> [batch_size*num_windows, num_heads, Mh*Mw, Mh*Mw]\n        attn = tf.matmul(a=q, b=k, transpose_b=True) * self.scale\n\n        # relative_position_bias(reshape): [Mh*Mw*Mh*Mw,nH] -> [Mh*Mw,Mh*Mw,nH]\n        relative_position_bias = tf.gather(self.relative_position_bias_table,\n                                           tf.reshape(self.relative_position_index, [-1]))\n        relative_position_bias = tf.reshape(relative_position_bias,\n                                            [self.window_size[0] * self.window_size[1],\n                                             self.window_size[0] * self.window_size[1],\n                                             -1])\n        relative_position_bias = tf.transpose(relative_position_bias, [2, 0, 1])  # [nH, Mh*Mw, Mh*Mw]\n        attn = attn + tf.expand_dims(relative_position_bias, 0)\n\n        if mask is not None:\n            # mask: [nW, Mh*Mw, Mh*Mw]\n            nW = mask.shape[0]  # num_windows\n            # attn(reshape): [batch_size, num_windows, num_heads, Mh*Mw, Mh*Mw]\n            # mask(expand_dim): [1, nW, 1, Mh*Mw, Mh*Mw]\n            attn = tf.reshape(attn, [B_ // nW, nW, self.num_heads, N, N]) + tf.expand_dims(tf.expand_dims(mask, 1), 0)\n            attn = tf.reshape(attn, [-1, self.num_heads, N, N])\n\n        attn = tf.nn.softmax(attn, axis=-1)\n        attn = self.attn_drop(attn, training=training)\n\n        # multiply -> [batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head]\n        x = tf.matmul(attn, v)\n        # transpose: -> [batch_size*num_windows, Mh*Mw, num_heads, embed_dim_per_head]\n        x = tf.transpose(x, [0, 2, 1, 3])\n        # reshape: -> [batch_size*num_windows, Mh*Mw, total_embed_dim]\n        x = tf.reshape(x, [B_, N, C])\n\n        x = self.proj(x)\n        x = self.proj_drop(x, training=training)\n        return x\n\n\nclass SwinTransformerBlock(layers.Layer):\n    r\"\"\" Swin Transformer Block.\n\n    Args:\n        dim (int): Number of input channels.\n        num_heads (int): Number of attention heads.\n        window_size (int): Window size.\n        shift_size (int): Shift size for SW-MSA.\n        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.\n        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True\n        drop (float, optional): Dropout rate. Default: 0.0\n        attn_drop (float, optional): Attention dropout rate. Default: 0.0\n        drop_path (float, optional): Stochastic depth rate. Default: 0.0\n    \"\"\"\n\n    def __init__(self, dim, num_heads, window_size=7, shift_size=0,\n                 mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0., drop_path=0., name=None):\n        super().__init__(name=name)\n        self.dim = dim\n        self.num_heads = num_heads\n        self.window_size = window_size\n        self.shift_size = shift_size\n        self.mlp_ratio = mlp_ratio\n        assert 0 <= self.shift_size < self.window_size, \"shift_size must in 0-window_size\"\n\n        self.norm1 = layers.LayerNormalization(epsilon=1e-6, name=\"norm1\")\n        self.attn = WindowAttention(dim,\n                                    window_size=(window_size, window_size),\n                                    num_heads=num_heads,\n                                    qkv_bias=qkv_bias,\n                                    attn_drop_ratio=attn_drop,\n                                    proj_drop_ratio=drop,\n                                    name=\"attn\")\n        self.drop_path = layers.Dropout(rate=drop_path, noise_shape=(None, 1, 1)) if drop_path > 0. \\\n            else layers.Activation(\"linear\")\n        self.norm2 = layers.LayerNormalization(epsilon=1e-6, name=\"norm2\")\n        self.mlp = MLP(dim, drop=drop, name=\"mlp\")\n\n    def call(self, x, attn_mask, training=None):\n        H, W = self.H, self.W\n        B, L, C = x.shape\n        assert L == H * W, \"input feature has wrong size\"\n\n        shortcut = x\n        x = self.norm1(x)\n        x = tf.reshape(x, [B, H, W, C])\n\n        # pad feature maps to multiples of window size\n        # 把feature map给pad到window size的整数倍\n        pad_r = (self.window_size - W % self.window_size) % self.window_size\n        pad_b = (self.window_size - H % self.window_size) % self.window_size\n        if pad_r > 0 or pad_b > 0:\n            paddings = tf.constant([[0, 0],\n                                    [0, pad_r],\n                                    [0, pad_b],\n                                    [0, 0]])\n            x = tf.pad(x, paddings)\n\n        _, Hp, Wp, _ = x.shape\n\n        # cyclic shift\n        if self.shift_size > 0:\n            shifted_x = tf.roll(x, shift=(-self.shift_size, -self.shift_size), axis=(1, 2))\n        else:\n            shifted_x = x\n            attn_mask = None\n\n        # partition windows\n        x_windows = window_partition(shifted_x, self.window_size)  # [nW*B, Mh, Mw, C]\n        x_windows = tf.reshape(x_windows, [-1, self.window_size * self.window_size, C])  # [nW*B, Mh*Mw, C]\n\n        # W-MSA/SW-MSA\n        attn_windows = self.attn(x_windows, mask=attn_mask, training=training)  # [nW*B, Mh*Mw, C]\n\n        # merge windows\n        attn_windows = tf.reshape(attn_windows,\n                                  [-1, self.window_size, self.window_size, C])  # [nW*B, Mh, Mw, C]\n        shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp)  # [B, H', W', C]\n\n        # reverse cyclic shift\n        if self.shift_size > 0:\n            x = tf.roll(shifted_x, shift=(self.shift_size, self.shift_size), axis=(1, 2))\n        else:\n            x = shifted_x\n\n        if pad_r > 0 or pad_b > 0:\n            # 把前面pad的数据移除掉\n            x = tf.slice(x, begin=[0, 0, 0, 0], size=[B, H, W, C])\n\n        x = tf.reshape(x, [B, H * W, C])\n\n        # FFN\n        x = shortcut + self.drop_path(x, training=training)\n        x = x + self.drop_path(self.mlp(self.norm2(x)), training=training)\n\n        return x\n\n\nclass BasicLayer(layers.Layer):\n    \"\"\"\n    A basic Swin Transformer layer for one stage.\n\n    Args:\n        dim (int): Number of input channels.\n        depth (int): Number of blocks.\n        num_heads (int): Number of attention heads.\n        window_size (int): Local window size.\n        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.\n        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True\n        drop (float, optional): Dropout rate. Default: 0.0\n        attn_drop (float, optional): Attention dropout rate. Default: 0.0\n        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0\n        downsample (layer.Layer | None, optional): Downsample layer at the end of the layer. Default: None\n    \"\"\"\n\n    def __init__(self, dim, depth, num_heads, window_size,\n                 mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0.,\n                 drop_path=0., downsample=None, name=None):\n        super().__init__(name=name)\n        self.dim = dim\n        self.depth = depth\n        self.window_size = window_size\n        self.shift_size = window_size // 2\n\n        # build blocks\n        self.blocks = [\n            SwinTransformerBlock(dim=dim,\n                                 num_heads=num_heads,\n                                 window_size=window_size,\n                                 shift_size=0 if (i % 2 == 0) else self.shift_size,\n                                 mlp_ratio=mlp_ratio,\n                                 qkv_bias=qkv_bias,\n                                 drop=drop,\n                                 attn_drop=attn_drop,\n                                 drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,\n                                 name=f\"block{i}\")\n            for i in range(depth)\n        ]\n\n        # patch merging layer\n        if downsample is not None:\n            self.downsample = downsample(dim=dim, name=\"downsample\")\n        else:\n            self.downsample = None\n\n    def create_mask(self, H, W):\n        # calculate attention mask for SW-MSA\n        # 保证Hp和Wp是window_size的整数倍\n        Hp = int(np.ceil(H / self.window_size)) * self.window_size\n        Wp = int(np.ceil(W / self.window_size)) * self.window_size\n        # 拥有和feature map一样的通道排列顺序，方便后续window_partition\n        img_mask = np.zeros([1, Hp, Wp, 1])  # [1, Hp, Wp, 1]\n        h_slices = (slice(0, -self.window_size),\n                    slice(-self.window_size, -self.shift_size),\n                    slice(-self.shift_size, None))\n        w_slices = (slice(0, -self.window_size),\n                    slice(-self.window_size, -self.shift_size),\n                    slice(-self.shift_size, None))\n\n        cnt = 0\n        for h in h_slices:\n            for w in w_slices:\n                img_mask[:, h, w, :] = cnt\n                cnt += 1\n\n        img_mask = tf.convert_to_tensor(img_mask, dtype=tf.float32)\n        mask_windows = window_partition(img_mask, self.window_size)  # [nW, Mh, Mw, 1]\n        mask_windows = tf.reshape(mask_windows, [-1, self.window_size * self.window_size])  # [nW, Mh*Mw]\n        # [nW, 1, Mh*Mw] - [nW, Mh*Mw, 1]\n        attn_mask = tf.expand_dims(mask_windows, 1) - tf.expand_dims(mask_windows, 2)\n        attn_mask = tf.where(attn_mask != 0, -100.0, attn_mask)\n        attn_mask = tf.where(attn_mask == 0, 0.0, attn_mask)\n\n        return attn_mask\n\n    def call(self, x, H, W, training=None):\n        attn_mask = self.create_mask(H, W)  # [nW, Mh*Mw, Mh*Mw]\n        for blk in self.blocks:\n            blk.H, blk.W = H, W\n            x = blk(x, attn_mask, training=training)\n\n        if self.downsample is not None:\n            x = self.downsample(x, H, W)\n            H, W = (H + 1) // 2, (W + 1) // 2\n\n        return x, H, W\n\n\nclass SwinTransformer(Model):\n    r\"\"\" Swin Transformer\n        A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows`  -\n          https://arxiv.org/pdf/2103.14030\n\n    Args:\n        patch_size (int | tuple(int)): Patch size. Default: 4\n        num_classes (int): Number of classes for classification head. Default: 1000\n        embed_dim (int): Patch embedding dimension. Default: 96\n        depths (tuple(int)): Depth of each Swin Transformer layer.\n        num_heads (tuple(int)): Number of attention heads in different layers.\n        window_size (int): Window size. Default: 7\n        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4\n        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True\n        drop_rate (float): Dropout rate. Default: 0\n        attn_drop_rate (float): Attention dropout rate. Default: 0\n        drop_path_rate (float): Stochastic depth rate. Default: 0.1\n        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.\n        patch_norm (bool): If True, add normalization after patch embedding. Default: True\n        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False\n    \"\"\"\n\n    def __init__(self, patch_size=4, num_classes=1000,\n                 embed_dim=96, depths=(2, 2, 6, 2), num_heads=(3, 6, 12, 24),\n                 window_size=7, mlp_ratio=4., qkv_bias=True,\n                 drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1,\n                 norm_layer=layers.LayerNormalization, name=None, **kwargs):\n        super().__init__(name=name)\n\n        self.num_classes = num_classes\n        self.num_layers = len(depths)\n        self.embed_dim = embed_dim\n        self.mlp_ratio = mlp_ratio\n\n        # split image into non-overlapping patches\n        self.patch_embed = PatchEmbed(patch_size=patch_size,\n                                      embed_dim=embed_dim,\n                                      norm_layer=norm_layer)\n        self.pos_drop = layers.Dropout(drop_rate)\n\n        # stochastic depth decay rule\n        dpr = [x for x in np.linspace(0, drop_path_rate, sum(depths))]\n\n        # build layers\n        self.stage_layers = []\n        for i_layer in range(self.num_layers):\n            # 注意这里构建的stage和论文图中有些差异\n            # 这里的stage不包含该stage的patch_merging层，包含的是下个stage的\n            layer = BasicLayer(dim=int(embed_dim * 2 ** i_layer),\n                               depth=depths[i_layer],\n                               num_heads=num_heads[i_layer],\n                               window_size=window_size,\n                               mlp_ratio=self.mlp_ratio,\n                               qkv_bias=qkv_bias,\n                               drop=drop_rate,\n                               attn_drop=attn_drop_rate,\n                               drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],\n                               downsample=PatchMerging if (i_layer < self.num_layers - 1) else None,\n                               name=f\"layer{i_layer}\")\n            self.stage_layers.append(layer)\n\n        self.norm = norm_layer(epsilon=1e-6, name=\"norm\")\n        self.head = layers.Dense(num_classes,\n                                 kernel_initializer=initializers.TruncatedNormal(stddev=0.02),\n                                 bias_initializer=initializers.Zeros(),\n                                 name=\"head\")\n\n    def call(self, x, training=None):\n        x, H, W = self.patch_embed(x)  # x: [B, L, C]\n        x = self.pos_drop(x, training=training)\n\n        for layer in self.stage_layers:\n            x, H, W = layer(x, H, W, training=training)\n\n        x = self.norm(x)  # [B, L, C]\n        x = tf.reduce_mean(x, axis=1)\n        x = self.head(x)\n\n        return x\n\n\ndef swin_tiny_patch4_window7_224(num_classes: int = 1000, **kwargs):\n    model = SwinTransformer(patch_size=4,\n                            window_size=7,\n                            embed_dim=96,\n                            depths=(2, 2, 6, 2),\n                            num_heads=(3, 6, 12, 24),\n                            num_classes=num_classes,\n                            name=\"swin_tiny_patch4_window7\",\n                            **kwargs)\n    return model\n\n\ndef swin_small_patch4_window7_224(num_classes: int = 1000, **kwargs):\n    model = SwinTransformer(in_chans=3,\n                            patch_size=4,\n                            window_size=7,\n                            embed_dim=96,\n                            depths=(2, 2, 18, 2),\n                            num_heads=(3, 6, 12, 24),\n                            num_classes=num_classes,\n                            name=\"swin_small_patch4_window7\",\n                            **kwargs)\n    return model\n\n\ndef swin_base_patch4_window7_224(num_classes: int = 1000, **kwargs):\n    model = SwinTransformer(in_chans=3,\n                            patch_size=4,\n                            window_size=7,\n                            embed_dim=128,\n                            depths=(2, 2, 18, 2),\n                            num_heads=(4, 8, 16, 32),\n                            num_classes=num_classes,\n                            name=\"swin_base_patch4_window7\",\n                            **kwargs)\n    return model\n\n\ndef swin_base_patch4_window12_384(num_classes: int = 1000, **kwargs):\n    model = SwinTransformer(in_chans=3,\n                            patch_size=4,\n                            window_size=12,\n                            embed_dim=128,\n                            depths=(2, 2, 18, 2),\n                            num_heads=(4, 8, 16, 32),\n                            num_classes=num_classes,\n                            name=\"swin_base_patch4_window12\",\n                            **kwargs)\n    return model\n\n\ndef swin_base_patch4_window7_224_in22k(num_classes: int = 21841, **kwargs):\n    model = SwinTransformer(in_chans=3,\n                            patch_size=4,\n                            window_size=7,\n                            embed_dim=128,\n                            depths=(2, 2, 18, 2),\n                            num_heads=(4, 8, 16, 32),\n                            num_classes=num_classes,\n                            name=\"swin_base_patch4_window7\",\n                            **kwargs)\n    return model\n\n\ndef swin_base_patch4_window12_384_in22k(num_classes: int = 21841, **kwargs):\n    model = SwinTransformer(in_chans=3,\n                            patch_size=4,\n                            window_size=12,\n                            embed_dim=128,\n                            depths=(2, 2, 18, 2),\n                            num_heads=(4, 8, 16, 32),\n                            num_classes=num_classes,\n                            name=\"swin_base_patch4_window12\",\n                            **kwargs)\n    return model\n\n\ndef swin_large_patch4_window7_224_in22k(num_classes: int = 21841, **kwargs):\n    model = SwinTransformer(in_chans=3,\n                            patch_size=4,\n                            window_size=7,\n                            embed_dim=192,\n                            depths=(2, 2, 18, 2),\n                            num_heads=(6, 12, 24, 48),\n                            num_classes=num_classes,\n                            name=\"swin_large_patch4_window7\",\n                            **kwargs)\n    return model\n\n\ndef swin_large_patch4_window12_384_in22k(num_classes: int = 21841, **kwargs):\n    model = SwinTransformer(in_chans=3,\n                            patch_size=4,\n                            window_size=12,\n                            embed_dim=192,\n                            depths=(2, 2, 18, 2),\n                            num_heads=(6, 12, 24, 48),\n                            num_classes=num_classes,\n                            name=\"swin_large_patch4_window12\",\n                            **kwargs)\n    return model\n"
  },
  {
    "path": "tensorflow_classification/swin_transformer/predict.py",
    "content": "import os\nimport json\nimport glob\nimport numpy as np\n\nfrom PIL import Image\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\n\nfrom model import swin_tiny_patch4_window7_224 as create_model\n\n\ndef main():\n    num_classes = 5\n    im_height = im_width = 224\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n    # resize image\n    img = img.resize((im_width, im_height))\n    plt.imshow(img)\n\n    # read image\n    img = np.array(img).astype(np.float32)\n\n    # preprocess\n    img = (img / 255. - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]\n\n    # Add the image to a batch where it's the only member.\n    img = (np.expand_dims(img, 0))\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    # create model\n    model = create_model(num_classes=num_classes)\n    model.build([1, im_height, im_width, 3])\n\n    weights_path = './save_weights/model.ckpt'\n    assert len(glob.glob(weights_path+\"*\")), \"cannot find {}\".format(weights_path)\n    model.load_weights(weights_path)\n\n    result = np.squeeze(model.predict(img, batch_size=1))\n    result = tf.keras.layers.Softmax()(result)\n    predict_class = np.argmax(result)\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_class)],\n                                                 result[predict_class])\n    plt.title(print_res)\n    for i in range(len(result)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  result[i]))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/swin_transformer/train.py",
    "content": "import os\nimport re\nimport datetime\nimport sys\n\nimport tensorflow as tf\nfrom tqdm import tqdm\n\nfrom model import swin_tiny_patch4_window7_224 as create_model\nfrom utils import generate_ds\n\nassert tf.version.VERSION >= \"2.4.0\", \"version of tf must greater/equal than 2.4.0\"\n\n\ndef main():\n    data_root = \"/data/flower_photos\"  # get data root path\n\n    if not os.path.exists(\"./save_weights\"):\n        os.makedirs(\"./save_weights\")\n\n    img_size = 224\n    batch_size = 8\n    epochs = 10\n    num_classes = 5\n    freeze_layers = False\n    initial_lr = 0.0001\n    weight_decay = 1e-5\n\n    log_dir = \"./logs/\" + datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n    train_writer = tf.summary.create_file_writer(os.path.join(log_dir, \"train\"))\n    val_writer = tf.summary.create_file_writer(os.path.join(log_dir, \"val\"))\n\n    # data generator with data augmentation\n    train_ds, val_ds = generate_ds(data_root,\n                                   train_im_width=img_size,\n                                   train_im_height=img_size,\n                                   batch_size=batch_size,\n                                   val_rate=0.2)\n\n    # create model\n    model = create_model(num_classes=num_classes)\n    model.build((1, img_size, img_size, 3))\n\n    # 下载我提前转好的预训练权重\n    # 链接: https://pan.baidu.com/s/1cHVwia2i3wD7-0Ueh2WmrQ  密码: sq8c\n    # load weights\n    pre_weights_path = './swin_tiny_patch4_window7_224.h5'\n    assert os.path.exists(pre_weights_path), \"cannot find {}\".format(pre_weights_path)\n    model.load_weights(pre_weights_path, by_name=True, skip_mismatch=True)\n\n    # freeze bottom layers\n    if freeze_layers:\n        for layer in model.layers:\n            if \"head\" not in layer.name:\n                layer.trainable = False\n            else:\n                print(\"training {}\".format(layer.name))\n\n    model.summary()\n\n    # using keras low level api for training\n    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n    optimizer = tf.keras.optimizers.Adam(learning_rate=initial_lr)\n\n    train_loss = tf.keras.metrics.Mean(name='train_loss')\n    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')\n\n    val_loss = tf.keras.metrics.Mean(name='val_loss')\n    val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')\n\n    @tf.function\n    def train_step(train_images, train_labels):\n        with tf.GradientTape() as tape:\n            output = model(train_images, training=True)\n            # cross entropy loss\n            ce_loss = loss_object(train_labels, output)\n\n            # l2 loss\n            matcher = re.compile(\".*(bias|gamma|beta).*\")\n            l2loss = weight_decay * tf.add_n([\n                tf.nn.l2_loss(v)\n                for v in model.trainable_variables\n                if not matcher.match(v.name)\n            ])\n\n            loss = ce_loss + l2loss\n\n        gradients = tape.gradient(loss, model.trainable_variables)\n        optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n        train_loss(ce_loss)\n        train_accuracy(train_labels, output)\n\n    @tf.function\n    def val_step(val_images, val_labels):\n        output = model(val_images, training=False)\n        loss = loss_object(val_labels, output)\n\n        val_loss(loss)\n        val_accuracy(val_labels, output)\n\n    best_val_acc = 0.\n    for epoch in range(epochs):\n        train_loss.reset_states()  # clear history info\n        train_accuracy.reset_states()  # clear history info\n        val_loss.reset_states()  # clear history info\n        val_accuracy.reset_states()  # clear history info\n\n        # train\n        train_bar = tqdm(train_ds, file=sys.stdout)\n        for images, labels in train_bar:\n            train_step(images, labels)\n\n            # print train process\n            train_bar.desc = \"train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}\".format(epoch + 1,\n                                                                                 epochs,\n                                                                                 train_loss.result(),\n                                                                                 train_accuracy.result())\n\n        # validate\n        val_bar = tqdm(val_ds, file=sys.stdout)\n        for images, labels in val_bar:\n            val_step(images, labels)\n\n            # print val process\n            val_bar.desc = \"valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}\".format(epoch + 1,\n                                                                               epochs,\n                                                                               val_loss.result(),\n                                                                               val_accuracy.result())\n        # writing training loss and acc\n        with train_writer.as_default():\n            tf.summary.scalar(\"loss\", train_loss.result(), epoch)\n            tf.summary.scalar(\"accuracy\", train_accuracy.result(), epoch)\n\n        # writing validation loss and acc\n        with val_writer.as_default():\n            tf.summary.scalar(\"loss\", val_loss.result(), epoch)\n            tf.summary.scalar(\"accuracy\", val_accuracy.result(), epoch)\n\n        # only save best weights\n        if val_accuracy.result() > best_val_acc:\n            best_val_acc = val_accuracy.result()\n            save_name = \"./save_weights/model.ckpt\"\n            model.save_weights(save_name, save_format=\"tf\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/swin_transformer/trans_weights.py",
    "content": "import torch\nfrom model import *\n\n\ndef main(weights_path: str,\n         model_name: str,\n         model: tf.keras.Model):\n    var_dict = {v.name.split(':')[0]: v for v in model.weights}\n\n    weights_dict = torch.load(weights_path, map_location=\"cpu\")[\"model\"]\n    w_dict = {}\n    for k, v in weights_dict.items():\n        if \"patch_embed\" in k:\n            k = k.replace(\".\", \"/\")\n            if \"proj\" in k:\n                k = k.replace(\"proj/weight\", \"proj/kernel\")\n                if len(v.shape) > 1:\n                    # conv weights\n                    v = np.transpose(v.numpy(), (2, 3, 1, 0)).astype(np.float32)\n                    w_dict[k] = v\n                else:\n                    # bias\n                    w_dict[k] = v\n            elif \"norm\" in k:\n                k = k.replace(\"weight\", \"gamma\").replace(\"bias\", \"beta\")\n                w_dict[k] = v\n        elif \"layers\" in k:\n            k = k.replace(\"layers\", \"layer\")\n            split_k = k.split(\".\")\n            layer_id = split_k[0] + split_k[1]\n            if \"block\" in k:\n                split_k[2] = \"block\"\n                black_id = split_k[2] + split_k[3]\n                k = \"/\".join([layer_id, black_id, *split_k[4:]])\n                if \"attn\" in k or \"mlp\" in k:\n                    k = k.replace(\"weight\", \"kernel\")\n                    if \"kernel\" in k:\n                        v = np.transpose(v.numpy(), (1, 0)).astype(np.float32)\n                elif \"norm\" in k:\n                    k = k.replace(\"weight\", \"gamma\").replace(\"bias\", \"beta\")\n                w_dict[k] = v\n            elif \"downsample\" in k:\n                k = \"/\".join([layer_id, *split_k[2:]])\n                if \"reduction\" in k:\n                    k = k.replace(\"weight\", \"kernel\")\n                    if \"kernel\" in k:\n                        v = np.transpose(v.numpy(), (1, 0)).astype(np.float32)\n                elif \"norm\" in k:\n                    k = k.replace(\"weight\", \"gamma\").replace(\"bias\", \"beta\")\n                w_dict[k] = v\n        elif \"norm\" in k:\n            k = k.replace(\".\", \"/\").replace(\"weight\", \"gamma\").replace(\"bias\", \"beta\")\n            w_dict[k] = v\n        elif \"head\" in k:\n            k = k.replace(\".\", \"/\")\n            k = k.replace(\"weight\", \"kernel\")\n            if \"kernel\" in k:\n                v = np.transpose(v.numpy(), (1, 0)).astype(np.float32)\n            w_dict[k] = v\n\n    for key, var in var_dict.items():\n        if key in w_dict:\n            if w_dict[key].shape != var.shape:\n                msg = \"shape mismatch: {}\".format(key)\n                print(msg)\n            else:\n                var.assign(w_dict[key], read_value=False)\n        else:\n            msg = \"Not found {} in {}\".format(key, weights_path)\n            print(msg)\n\n    model.save_weights(\"./{}.h5\".format(model_name))\n\n\nif __name__ == '__main__':\n    model = swin_tiny_patch4_window7_224()\n    model.build((1, 224, 224, 3))\n    # trained ImageNet-1K\n    # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth\n    main(weights_path=\"./swin_tiny_patch4_window7_224.pth\",\n         model_name=\"swin_tiny_patch4_window7_224\",\n         model=model)\n\n    # model = swin_small_patch4_window7_224()\n    # model.build((1, 224, 224, 3))\n    # # trained ImageNet-1K\n    # # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth\n    # main(weights_path=\"./swin_small_patch4_window7_224.pth\",\n    #      model_name=\"swin_small_patch4_window7_224\",\n    #      model=model)\n\n    # model = swin_base_patch4_window7_224()\n    # model.build((1, 224, 224, 3))\n    # # trained ImageNet-1K\n    # # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224.pth\n    # main(weights_path=\"./swin_base_patch4_window7_224.pth\",\n    #      model_name=\"swin_base_patch4_window7_224\",\n    #      model=model)\n\n    # model = swin_base_patch4_window12_384()\n    # model.build((1, 384, 384, 3))\n    # # trained ImageNet-1K\n    # # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384.pth\n    # main(weights_path=\"./swin_base_patch4_window12_384.pth\",\n    #      model_name=\"swin_base_patch4_window12_384\",\n    #      model=model)\n\n    # model = swin_base_patch4_window7_224_in22k()\n    # model.build((1, 224, 224, 3))\n    # # trained ImageNet-22K\n    # # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22k.pth\n    # main(weights_path=\"./swin_base_patch4_window7_224_22k.pth\",\n    #      model_name=\"swin_base_patch4_window7_224_22k\",\n    #      model=model)\n\n    # model = swin_base_patch4_window12_384_in22k()\n    # model.build((1, 384, 384, 3))\n    # # trained ImageNet-22K\n    # # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth\n    # main(weights_path=\"./swin_base_patch4_window12_384_22k.pth\",\n    #      model_name=\"swin_base_patch4_window12_384_22k\",\n    #      model=model)\n\n    # model = swin_large_patch4_window7_224_in22k()\n    # model.build((1, 224, 224, 3))\n    # # trained ImageNet-22K\n    # # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window7_224_22k.pth\n    # main(weights_path=\"./swin_large_patch4_window7_224_22k.pth\",\n    #      model_name=\"swin_large_patch4_window7_224_22k\",\n    #      model=model)\n\n    # model = swin_large_patch4_window12_384_in22k()\n    # model.build((1, 384, 384, 3))\n    # # trained ImageNet-22K\n    # # https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth\n    # main(weights_path=\"./swin_large_patch4_window12_384_22k.pth\",\n    #      model_name=\"swin_large_patch4_window12_384_22k\",\n    #      model=model)\n"
  },
  {
    "path": "tensorflow_classification/swin_transformer/utils.py",
    "content": "import os\nimport json\nimport random\n\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\n\n\ndef read_split_data(root: str, val_rate: float = 0.2):\n    random.seed(0)  # 保证随机划分结果一致\n    assert os.path.exists(root), \"dataset root: {} does not exist.\".format(root)\n\n    # 遍历文件夹，一个文件夹对应一个类别\n    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]\n    # 排序，保证顺序一致\n    flower_class.sort()\n    # 生成类别名称以及对应的数字索引\n    class_indices = dict((k, v) for v, k in enumerate(flower_class))\n    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    train_images_path = []  # 存储训练集的所有图片路径\n    train_images_label = []  # 存储训练集图片对应索引信息\n    val_images_path = []  # 存储验证集的所有图片路径\n    val_images_label = []  # 存储验证集图片对应索引信息\n    every_class_num = []  # 存储每个类别的样本总数\n    supported = [\".jpg\", \".JPG\", \".jpeg\", \".JPEG\"]  # 支持的文件后缀类型\n    # 遍历每个文件夹下的文件\n    for cla in flower_class:\n        cla_path = os.path.join(root, cla)\n        # 遍历获取supported支持的所有文件路径\n        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)\n                  if os.path.splitext(i)[-1] in supported]\n        # 获取该类别对应的索引\n        image_class = class_indices[cla]\n        # 记录该类别的样本数量\n        every_class_num.append(len(images))\n        # 按比例随机采样验证样本\n        val_path = random.sample(images, k=int(len(images) * val_rate))\n\n        for img_path in images:\n            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集\n                val_images_path.append(img_path)\n                val_images_label.append(image_class)\n            else:  # 否则存入训练集\n                train_images_path.append(img_path)\n                train_images_label.append(image_class)\n\n    print(\"{} images were found in the dataset.\\n{} for training, {} for validation\".format(sum(every_class_num),\n                                                                                            len(train_images_path),\n                                                                                            len(val_images_path)\n                                                                                            ))\n\n    plot_image = False\n    if plot_image:\n        # 绘制每种类别个数柱状图\n        plt.bar(range(len(flower_class)), every_class_num, align='center')\n        # 将横坐标0,1,2,3,4替换为相应的类别名称\n        plt.xticks(range(len(flower_class)), flower_class)\n        # 在柱状图上添加数值标签\n        for i, v in enumerate(every_class_num):\n            plt.text(x=i, y=v + 5, s=str(v), ha='center')\n        # 设置x坐标\n        plt.xlabel('image class')\n        # 设置y坐标\n        plt.ylabel('number of images')\n        # 设置柱状图的标题\n        plt.title('flower class distribution')\n        plt.show()\n\n    return train_images_path, train_images_label, val_images_path, val_images_label\n\n\ndef generate_ds(data_root: str,\n                train_im_height: int = 224,\n                train_im_width: int = 224,\n                val_im_height: int = None,\n                val_im_width: int = None,\n                batch_size: int = 8,\n                val_rate: float = 0.1,\n                cache_data: bool = False):\n    \"\"\"\n    读取划分数据集，并生成训练集和验证集的迭代器\n    :param data_root: 数据根目录\n    :param train_im_height: 训练输入网络图像的高度\n    :param train_im_width:  训练输入网络图像的宽度\n    :param val_im_height: 验证输入网络图像的高度\n    :param val_im_width:  验证输入网络图像的宽度\n    :param batch_size: 训练使用的batch size\n    :param val_rate:  将数据按给定比例划分到验证集\n    :param cache_data: 是否缓存数据\n    :return:\n    \"\"\"\n    assert train_im_height is not None\n    assert train_im_width is not None\n    if val_im_width is None:\n        val_im_width = train_im_width\n    if val_im_height is None:\n        val_im_height = train_im_height\n\n    train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate)\n    AUTOTUNE = tf.data.experimental.AUTOTUNE\n\n    def process_train_info(img_path, label):\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image, channels=3)\n        image = tf.cast(image, tf.float32)\n        image = tf.image.resize_with_crop_or_pad(image, train_im_height, train_im_width)\n        image = tf.image.random_flip_left_right(image)\n        image = (image / 255. - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]\n        return image, label\n\n    def process_val_info(img_path, label):\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image, channels=3)\n        image = tf.cast(image, tf.float32)\n        image = tf.image.resize_with_crop_or_pad(image, val_im_height, val_im_width)\n        image = (image / 255. - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]\n        return image, label\n\n    # Configure dataset for performance\n    def configure_for_performance(ds,\n                                  shuffle_size: int,\n                                  shuffle: bool = False,\n                                  cache: bool = False):\n        if cache:\n            ds = ds.cache()  # 读取数据后缓存至内存\n        if shuffle:\n            ds = ds.shuffle(buffer_size=shuffle_size)  # 打乱数据顺序\n        ds = ds.batch(batch_size)                      # 指定batch size\n        ds = ds.prefetch(buffer_size=AUTOTUNE)         # 在训练的同时提前准备下一个step的数据\n        return ds\n\n    train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path),\n                                                   tf.constant(train_img_label)))\n    total_train = len(train_img_path)\n\n    # Use Dataset.map to create a dataset of image, label pairs\n    train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE)\n    train_ds = configure_for_performance(train_ds, total_train, shuffle=True, cache=cache_data)\n\n    val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path),\n                                                 tf.constant(val_img_label)))\n    total_val = len(val_img_path)\n    # Use Dataset.map to create a dataset of image, label pairs\n    val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE)\n    val_ds = configure_for_performance(val_ds, total_val, cache=False)\n\n    return train_ds, val_ds\n"
  },
  {
    "path": "tensorflow_classification/tensorboard_test/train_fit.py",
    "content": "import json\nimport os\nimport math\nimport datetime\n\nimport tensorflow as tf\nfrom tensorflow.keras.preprocessing.image import ImageDataGenerator\nassert tf.version.VERSION >= \"2.4.0\", \"version of tf must greater/equal than 2.4.0\"\n\n\ndef main():\n    data_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\n    image_path = os.path.join(data_root, \"data_set\", \"flower_data\")  # flower data set path\n    train_dir = os.path.join(image_path, \"train\")\n    validation_dir = os.path.join(image_path, \"val\")\n    assert os.path.exists(train_dir), \"cannot find {}\".format(train_dir)\n    assert os.path.exists(validation_dir), \"cannot find {}\".format(validation_dir)\n\n    if not os.path.exists(\"./save_weights\"):\n        os.makedirs(\"./save_weights\")\n\n    num_classes = 5\n    im_height = 224\n    im_width = 224\n    batch_size = 8\n    epochs = 20\n    log_dir = \"logs/fit/\" + datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n\n    # data generator with data augmentation\n    train_image_generator = ImageDataGenerator(horizontal_flip=True)\n\n    validation_image_generator = ImageDataGenerator()\n\n    train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,\n                                                               batch_size=batch_size,\n                                                               shuffle=True,\n                                                               target_size=(im_height, im_width),\n                                                               class_mode='categorical')\n    total_train = train_data_gen.n\n\n    # get class dict\n    class_indices = train_data_gen.class_indices\n\n    # transform value and key of dict\n    inverse_dict = dict((val, key) for key, val in class_indices.items())\n    # write dict into json file\n    json_str = json.dumps(inverse_dict, indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,\n                                                                  batch_size=batch_size,\n                                                                  shuffle=False,\n                                                                  target_size=(im_height, im_width),\n                                                                  class_mode='categorical')\n\n    total_val = val_data_gen.n\n    print(\"using {} images for training, {} images for validation.\".format(total_train,\n                                                                           total_val))\n\n    # create base model\n    base_model = tf.keras.applications.ResNet50(include_top=False,\n                                                input_shape=(224, 224, 3),\n                                                weights='imagenet')\n    # freeze base model\n    base_model.trainable = False\n    base_model.summary()\n\n    # create new model on top\n    inputs = tf.keras.Input(shape=(224, 224, 3))\n    x = tf.keras.applications.resnet50.preprocess_input(inputs)\n    x = base_model(x, training=False)\n    x = tf.keras.layers.GlobalAveragePooling2D()(x)\n    outputs = tf.keras.layers.Dense(num_classes)(x)\n    model = tf.keras.Model(inputs, outputs)\n    model.summary()\n\n    model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.9),\n                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),\n                  metrics=[tf.keras.metrics.CategoricalAccuracy(\"accuracy\")])\n\n    # 自定义学习率变化\n    def scheduler(epoch):\n        initial_lr = 0.01\n        end_lr = 0.001\n        rate = ((1 + math.cos(epoch * math.pi / epochs)) / 2) * (1 - end_lr) + end_lr  # cosine\n        new_lr = rate * initial_lr\n\n        return new_lr\n\n    callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='./save_weights/model_{epoch}.h5',\n                                                    save_best_only=True,\n                                                    save_weights_only=True,\n                                                    monitor=tf.keras.metrics.CategoricalAccuracy(\"accuracy\").name),\n                 tf.keras.callbacks.TensorBoard(log_dir=log_dir,\n                                                write_graph=True,\n                                                histogram_freq=1),\n                 tf.keras.callbacks.LearningRateScheduler(schedule=scheduler)]\n\n    model.fit(x=train_data_gen,\n              epochs=epochs,\n              validation_data=val_data_gen,\n              callbacks=callbacks)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/tensorboard_test/train_not_fit.py",
    "content": "import json\nimport os\nimport math\nimport datetime\n\nimport tensorflow as tf\nfrom tensorflow.keras.preprocessing.image import ImageDataGenerator\nfrom tqdm import tqdm\nassert tf.version.VERSION >= \"2.4.0\", \"version of tf must greater/equal than 2.4.0\"\n\n\ndef main():\n    data_root = os.path.abspath(os.path.join(os.getcwd(), \"../..\"))  # get data root path\n    image_path = os.path.join(data_root, \"data_set\", \"flower_data\")  # flower data set path\n    train_dir = os.path.join(image_path, \"train\")\n    validation_dir = os.path.join(image_path, \"val\")\n    assert os.path.exists(train_dir), \"cannot find {}\".format(train_dir)\n    assert os.path.exists(validation_dir), \"cannot find {}\".format(validation_dir)\n\n    if not os.path.exists(\"./save_weights\"):\n        os.makedirs(\"./save_weights\")\n\n    num_classes = 5\n    im_height = 224\n    im_width = 224\n    batch_size = 16\n    epochs = 20\n    log_dir = \"./logs/not_fit/\" + datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n    train_writer = tf.summary.create_file_writer(os.path.join(log_dir, \"train\"))\n    val_writer = tf.summary.create_file_writer(os.path.join(log_dir, \"val\"))\n\n    # data generator with data augmentation\n    train_image_generator = ImageDataGenerator(horizontal_flip=True)\n\n    validation_image_generator = ImageDataGenerator()\n\n    train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,\n                                                               batch_size=batch_size,\n                                                               shuffle=True,\n                                                               target_size=(im_height, im_width),\n                                                               class_mode='categorical')\n    total_train = train_data_gen.n\n\n    # get class dict\n    class_indices = train_data_gen.class_indices\n\n    # transform value and key of dict\n    inverse_dict = dict((val, key) for key, val in class_indices.items())\n    # write dict into json file\n    json_str = json.dumps(inverse_dict, indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,\n                                                                  batch_size=batch_size,\n                                                                  shuffle=False,\n                                                                  target_size=(im_height, im_width),\n                                                                  class_mode='categorical')\n\n    total_val = val_data_gen.n\n    print(\"using {} images for training, {} images for validation.\".format(total_train,\n                                                                           total_val))\n\n    # create base model\n    base_model = tf.keras.applications.ResNet50(include_top=False,\n                                                input_shape=(224, 224, 3),\n                                                weights='imagenet')\n    # freeze base model\n    base_model.trainable = False\n    base_model.summary()\n\n    # create new model on top\n    inputs = tf.keras.Input(shape=(224, 224, 3))\n    x = tf.keras.applications.resnet50.preprocess_input(inputs)\n    x = base_model(x, training=False)\n    x = tf.keras.layers.GlobalAveragePooling2D()(x)\n    outputs = tf.keras.layers.Dense(num_classes)(x)\n    model = tf.keras.Model(inputs, outputs)\n    model.summary()\n\n    # 自定义学习率变化\n    def scheduler(epoch):\n        initial_lr = 0.01\n        end_lr = 0.001\n        rate = ((1 + math.cos(epoch * math.pi / epochs)) / 2) * (1 - end_lr) + end_lr  # cosine\n        new_lr = rate * initial_lr\n\n        with train_writer.as_default():\n            tf.summary.scalar('learning rate', data=new_lr, step=epoch)\n\n        return new_lr\n\n    # using keras low level api for training\n    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True)\n    optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)\n\n    train_loss = tf.keras.metrics.Mean(name='train_loss')\n    train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')\n\n    val_loss = tf.keras.metrics.Mean(name='val_loss')\n    val_accuracy = tf.keras.metrics.CategoricalAccuracy(name='val_accuracy')\n\n    @tf.function\n    def train_step(images, labels):\n        with tf.GradientTape() as tape:\n            output = model(images, training=True)\n            loss = loss_object(labels, output)\n        gradients = tape.gradient(loss, model.trainable_variables)\n        optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n\n        train_loss(loss)\n        train_accuracy(labels, output)\n\n    @tf.function\n    def test_step(images, labels):\n        output = model(images, training=False)\n        t_loss = loss_object(labels, output)\n\n        val_loss(t_loss)\n        val_accuracy(labels, output)\n\n    best_val_accuracy = 0.\n    for epoch in range(epochs):\n        train_loss.reset_states()  # clear history info\n        train_accuracy.reset_states()  # clear history info\n        val_loss.reset_states()  # clear history info\n        val_accuracy.reset_states()  # clear history info\n\n        print(\"Epoch [{}/{}]\".format(epoch + 1, epochs))\n        # train\n        train_bar = tqdm(train_data_gen, file=sys.stdout)\n        for images, labels in train_bar:\n            train_step(images, labels)\n\n            # print train process\n            train_bar.desc = \"train_loss:{:.3f}, train_acc:{:.3f}\".format(train_loss.result(),\n                                                                          train_accuracy.result())\n\n        # update learning rate\n        optimizer.learning_rate = scheduler(epoch)\n\n        # validation\n        val_bar = tqdm(val_data_gen, file=sys.stdout)\n        for test_images, test_labels in val_bar:\n            test_step(test_images, test_labels)\n\n            # print val process\n            val_bar.desc = \"val_loss:{:.3f}, val_acc:{:.3f}\".format(val_loss.result(),\n                                                                    val_accuracy.result())\n\n        with train_writer.as_default():\n            tf.summary.scalar(\"loss\", train_loss.result(), epoch)\n            tf.summary.scalar(\"accuracy\", train_accuracy.result(), epoch)\n\n        with val_writer.as_default():\n            tf.summary.scalar(\"loss\", val_loss.result(), epoch)\n            tf.summary.scalar(\"accuracy\", val_accuracy.result(), epoch)\n\n        if val_accuracy.result() > best_val_accuracy:\n            best_val_accuracy = val_accuracy.result()\n            model.save_weights(\"./save_weights/model_{}.ckpt\".format(epoch), save_format=\"tf\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/vision_transformer/predict.py",
    "content": "import os\nimport json\nimport glob\nimport numpy as np\n\nfrom PIL import Image\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\n\nfrom vit_model import vit_base_patch16_224_in21k as create_model\n\n\ndef main():\n    num_classes = 5\n    im_height = im_width = 224\n\n    # load image\n    img_path = \"../tulip.jpg\"\n    assert os.path.exists(img_path), \"file: '{}' dose not exist.\".format(img_path)\n    img = Image.open(img_path)\n    # resize image\n    img = img.resize((im_width, im_height))\n    plt.imshow(img)\n\n    # read image\n    img = np.array(img).astype(np.float32)\n\n    # preprocess\n    img = (img / 255. - 0.5) / 0.5\n\n    # Add the image to a batch where it's the only member.\n    img = (np.expand_dims(img, 0))\n\n    # read class_indict\n    json_path = './class_indices.json'\n    assert os.path.exists(json_path), \"file: '{}' dose not exist.\".format(json_path)\n\n    with open(json_path, \"r\") as f:\n        class_indict = json.load(f)\n\n    # create model\n    model = create_model(num_classes=num_classes, has_logits=False)\n    model.build([1, 224, 224, 3])\n\n    weights_path = './save_weights/model.ckpt'\n    assert len(glob.glob(weights_path+\"*\")), \"cannot find {}\".format(weights_path)\n    model.load_weights(weights_path)\n\n    result = np.squeeze(model.predict(img, batch_size=1))\n    result = tf.keras.layers.Softmax()(result)\n    predict_class = np.argmax(result)\n\n    print_res = \"class: {}   prob: {:.3}\".format(class_indict[str(predict_class)],\n                                                 result[predict_class])\n    plt.title(print_res)\n    for i in range(len(result)):\n        print(\"class: {:10}   prob: {:.3}\".format(class_indict[str(i)],\n                                                  result[i]))\n    plt.show()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/vision_transformer/train.py",
    "content": "import os\nimport re\nimport sys\nimport math\nimport datetime\n\nimport tensorflow as tf\nfrom tqdm import tqdm\n\nfrom vit_model import vit_base_patch16_224_in21k as create_model\nfrom utils import generate_ds\n\nassert tf.version.VERSION >= \"2.4.0\", \"version of tf must greater/equal than 2.4.0\"\n\n\ndef main():\n    data_root = \"/data/flower_photos\"  # get data root path\n\n    if not os.path.exists(\"./save_weights\"):\n        os.makedirs(\"./save_weights\")\n\n    batch_size = 8\n    epochs = 10\n    num_classes = 5\n    freeze_layers = True\n    initial_lr = 0.001\n    weight_decay = 1e-4\n\n    log_dir = \"./logs/\" + datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n    train_writer = tf.summary.create_file_writer(os.path.join(log_dir, \"train\"))\n    val_writer = tf.summary.create_file_writer(os.path.join(log_dir, \"val\"))\n\n    # data generator with data augmentation\n    train_ds, val_ds = generate_ds(data_root, batch_size=batch_size, val_rate=0.2)\n\n    # create model\n    model = create_model(num_classes=num_classes, has_logits=False)\n    model.build((1, 224, 224, 3))\n\n    # 下载我提前转好的预训练权重\n    # 链接: https://pan.baidu.com/s/1ro-6bebc8zroYfupn-7jVQ  密码: s9d9\n    # load weights\n    pre_weights_path = './ViT-B_16.h5'\n    assert os.path.exists(pre_weights_path), \"cannot find {}\".format(pre_weights_path)\n    model.load_weights(pre_weights_path, by_name=True, skip_mismatch=True)\n\n    # freeze bottom layers\n    if freeze_layers:\n        for layer in model.layers:\n            if \"pre_logits\" not in layer.name and \"head\" not in layer.name:\n                layer.trainable = False\n            else:\n                print(\"training {}\".format(layer.name))\n\n    model.summary()\n\n    # custom learning rate curve\n    def scheduler(now_epoch):\n        end_lr_rate = 0.01  # end_lr = initial_lr * end_lr_rate\n        rate = ((1 + math.cos(now_epoch * math.pi / epochs)) / 2) * (1 - end_lr_rate) + end_lr_rate  # cosine\n        new_lr = rate * initial_lr\n\n        # writing lr into tensorboard\n        with train_writer.as_default():\n            tf.summary.scalar('learning rate', data=new_lr, step=epoch)\n\n        return new_lr\n\n    # using keras low level api for training\n    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n    optimizer = tf.keras.optimizers.SGD(learning_rate=initial_lr, momentum=0.9)\n\n    train_loss = tf.keras.metrics.Mean(name='train_loss')\n    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')\n\n    val_loss = tf.keras.metrics.Mean(name='val_loss')\n    val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')\n\n    @tf.function\n    def train_step(train_images, train_labels):\n        with tf.GradientTape() as tape:\n            output = model(train_images, training=True)\n            # cross entropy loss\n            ce_loss = loss_object(train_labels, output)\n\n            # l2 loss\n            matcher = re.compile(\".*(bias|gamma|beta).*\")\n            l2loss = weight_decay * tf.add_n([\n                tf.nn.l2_loss(v)\n                for v in model.trainable_variables\n                if not matcher.match(v.name)\n            ])\n\n            loss = ce_loss + l2loss\n\n        gradients = tape.gradient(loss, model.trainable_variables)\n        optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n        train_loss(ce_loss)\n        train_accuracy(train_labels, output)\n\n    @tf.function\n    def val_step(val_images, val_labels):\n        output = model(val_images, training=False)\n        loss = loss_object(val_labels, output)\n\n        val_loss(loss)\n        val_accuracy(val_labels, output)\n\n    best_val_acc = 0.\n    for epoch in range(epochs):\n        train_loss.reset_states()  # clear history info\n        train_accuracy.reset_states()  # clear history info\n        val_loss.reset_states()  # clear history info\n        val_accuracy.reset_states()  # clear history info\n\n        # train\n        train_bar = tqdm(train_ds, file=sys.stdout)\n        for images, labels in train_bar:\n            train_step(images, labels)\n\n            # print train process\n            train_bar.desc = \"train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}\".format(epoch + 1,\n                                                                                 epochs,\n                                                                                 train_loss.result(),\n                                                                                 train_accuracy.result())\n\n        # update learning rate\n        optimizer.learning_rate = scheduler(epoch)\n\n        # validate\n        val_bar = tqdm(val_ds, file=sys.stdout)\n        for images, labels in val_bar:\n            val_step(images, labels)\n\n            # print val process\n            val_bar.desc = \"valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}\".format(epoch + 1,\n                                                                               epochs,\n                                                                               val_loss.result(),\n                                                                               val_accuracy.result())\n        # writing training loss and acc\n        with train_writer.as_default():\n            tf.summary.scalar(\"loss\", train_loss.result(), epoch)\n            tf.summary.scalar(\"accuracy\", train_accuracy.result(), epoch)\n\n        # writing validation loss and acc\n        with val_writer.as_default():\n            tf.summary.scalar(\"loss\", val_loss.result(), epoch)\n            tf.summary.scalar(\"accuracy\", val_accuracy.result(), epoch)\n\n        # only save best weights\n        if val_accuracy.result() > best_val_acc:\n            best_val_acc = val_accuracy.result()\n            save_name = \"./save_weights/model.ckpt\"\n            model.save_weights(save_name, save_format=\"tf\")\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tensorflow_classification/vision_transformer/trans_weights.py",
    "content": "from vit_model import *\n\n\ndef main(weights_path: str,\n         model_name: str,\n         model: tf.keras.Model):\n    var_dict = {v.name.split(':')[0]: v for v in model.weights}\n\n    ckpt_dict = np.load(weights_path, allow_pickle=False)\n    # keys, values = zip(*list(ckpt_dict.items()))\n    w_dict = {}\n    for k, v in ckpt_dict.items():\n        key_ = k.replace(\"Transformer/\", \"\").\\\n            replace(\"MultiHeadDotProductAttention_1\", \"MultiHeadAttention\").\\\n            replace(\"MlpBlock_3\", \"MlpBlock\").\\\n            replace(\"posembed_input/pos_embedding\", \"pos_embed\").\\\n            replace(\"encoder_norm/bias\", \"encoder_norm/beta\").\\\n            replace(\"encoder_norm/scale\", \"encoder_norm/gamma\").\\\n            replace(\"LayerNorm_0/bias\", \"LayerNorm_0/beta\").\\\n            replace(\"LayerNorm_0/scale\", \"LayerNorm_0/gamma\"). \\\n            replace(\"LayerNorm_2/bias\", \"LayerNorm_1/beta\"). \\\n            replace(\"LayerNorm_2/scale\", \"LayerNorm_1/gamma\").\\\n            replace(\"embedding\", \"patch_embed/conv2d\")\n        w_dict[key_] = v\n\n    for i in range(model.depth):\n        q_kernel = w_dict.pop(\"encoderblock_{}/MultiHeadAttention/query/kernel\".format(i))\n        k_kernel = w_dict.pop(\"encoderblock_{}/MultiHeadAttention/key/kernel\".format(i))\n        v_kernel = w_dict.pop(\"encoderblock_{}/MultiHeadAttention/value/kernel\".format(i))\n        q_kernel = np.reshape(q_kernel, [q_kernel.shape[0], -1])\n        k_kernel = np.reshape(k_kernel, [k_kernel.shape[0], -1])\n        v_kernel = np.reshape(v_kernel, [v_kernel.shape[0], -1])\n        qkv_kernel = np.concatenate([q_kernel, k_kernel, v_kernel], axis=1)\n        w_dict[\"encoderblock_{}/MultiHeadAttention/qkv/kernel\".format(i)] = qkv_kernel\n\n        if model.qkv_bias:\n            q_bias = w_dict.pop(\"encoderblock_{}/MultiHeadAttention/query/bias\".format(i))\n            k_bias = w_dict.pop(\"encoderblock_{}/MultiHeadAttention/key/bias\".format(i))\n            v_bias = w_dict.pop(\"encoderblock_{}/MultiHeadAttention/value/bias\".format(i))\n            q_bias = np.reshape(q_bias, [-1])\n            k_bias = np.reshape(k_bias, [-1])\n            v_bias = np.reshape(v_bias, [-1])\n            qkv_bias = np.concatenate([q_bias, k_bias, v_bias], axis=0)\n            w_dict[\"encoderblock_{}/MultiHeadAttention/qkv/bias\".format(i)] = qkv_bias\n\n        out_kernel = w_dict[\"encoderblock_{}/MultiHeadAttention/out/kernel\".format(i)]\n        out_kernel = np.reshape(out_kernel, [-1, out_kernel.shape[-1]])\n        w_dict[\"encoderblock_{}/MultiHeadAttention/out/kernel\".format(i)] = out_kernel\n\n    for key, var in var_dict.items():\n        if key in w_dict:\n            if w_dict[key].shape != var.shape:\n                msg = \"shape mismatch: {}\".format(key)\n                print(msg)\n            else:\n                var.assign(w_dict[key], read_value=False)\n        else:\n            msg = \"Not found {} in {}\".format(key, weights_path)\n            print(msg)\n\n    model.save_weights(\"./{}.h5\".format(model_name))\n\n\nif __name__ == '__main__':\n    model = vit_base_patch16_224_in21k()\n    model.build((1, 224, 224, 3))\n    # https://storage.googleapis.com/vit_models/imagenet21k/ViT-B_16.npz\n    main(weights_path=\"./ViT-B_16.npz\",\n         model_name=\"ViT-B_16\",\n         model=model)\n\n    # model = vit_base_patch32_224_in21k()\n    # model.build((1, 224, 224, 3))\n    # # https://storage.googleapis.com/vit_models/imagenet21k/ViT-B_32.npz\n    # main(weights_path=\"./ViT-B_32.npz\",\n    #      model_name=\"ViT-B_32\",\n    #      model=model)\n\n    # model = vit_large_patch16_224_in21k()\n    # model.build((1, 224, 224, 3))\n    # # https://storage.googleapis.com/vit_models/imagenet21k/ViT-L_16.npz\n    # main(weights_path=\"./ViT-L_16.npz\",\n    #      model_name=\"ViT-L_16\",\n    #      model=model)\n\n    # model = vit_large_patch32_224_in21k()\n    # model.build((1, 224, 224, 3))\n    # # https://storage.googleapis.com/vit_models/imagenet21k/ViT-L_32.npz\n    # main(weights_path=\"./ViT-L_32.npz\",\n    #      model_name=\"ViT-L_32\",\n    #      model=model)\n"
  },
  {
    "path": "tensorflow_classification/vision_transformer/utils.py",
    "content": "import os\nimport json\nimport random\n\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\n\n\ndef read_split_data(root: str, val_rate: float = 0.2):\n    random.seed(0)  # 保证随机划分结果一致\n    assert os.path.exists(root), \"dataset root: {} does not exist.\".format(root)\n\n    # 遍历文件夹，一个文件夹对应一个类别\n    flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]\n    # 排序，保证顺序一致\n    flower_class.sort()\n    # 生成类别名称以及对应的数字索引\n    class_indices = dict((k, v) for v, k in enumerate(flower_class))\n    json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)\n    with open('class_indices.json', 'w') as json_file:\n        json_file.write(json_str)\n\n    train_images_path = []  # 存储训练集的所有图片路径\n    train_images_label = []  # 存储训练集图片对应索引信息\n    val_images_path = []  # 存储验证集的所有图片路径\n    val_images_label = []  # 存储验证集图片对应索引信息\n    every_class_num = []  # 存储每个类别的样本总数\n    supported = [\".jpg\", \".JPG\", \".jpeg\", \".JPEG\"]  # 支持的文件后缀类型\n    # 遍历每个文件夹下的文件\n    for cla in flower_class:\n        cla_path = os.path.join(root, cla)\n        # 遍历获取supported支持的所有文件路径\n        images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)\n                  if os.path.splitext(i)[-1] in supported]\n        # 获取该类别对应的索引\n        image_class = class_indices[cla]\n        # 记录该类别的样本数量\n        every_class_num.append(len(images))\n        # 按比例随机采样验证样本\n        val_path = random.sample(images, k=int(len(images) * val_rate))\n\n        for img_path in images:\n            if img_path in val_path:  # 如果该路径在采样的验证集样本中则存入验证集\n                val_images_path.append(img_path)\n                val_images_label.append(image_class)\n            else:  # 否则存入训练集\n                train_images_path.append(img_path)\n                train_images_label.append(image_class)\n\n    print(\"{} images were found in the dataset.\\n{} for training, {} for validation\".format(sum(every_class_num),\n                                                                                            len(train_images_path),\n                                                                                            len(val_images_path)\n                                                                                            ))\n\n    plot_image = False\n    if plot_image:\n        # 绘制每种类别个数柱状图\n        plt.bar(range(len(flower_class)), every_class_num, align='center')\n        # 将横坐标0,1,2,3,4替换为相应的类别名称\n        plt.xticks(range(len(flower_class)), flower_class)\n        # 在柱状图上添加数值标签\n        for i, v in enumerate(every_class_num):\n            plt.text(x=i, y=v + 5, s=str(v), ha='center')\n        # 设置x坐标\n        plt.xlabel('image class')\n        # 设置y坐标\n        plt.ylabel('number of images')\n        # 设置柱状图的标题\n        plt.title('flower class distribution')\n        plt.show()\n\n    return train_images_path, train_images_label, val_images_path, val_images_label\n\n\ndef generate_ds(data_root: str,\n                train_im_height: int = 224,\n                train_im_width: int = 224,\n                val_im_height: int = None,\n                val_im_width: int = None,\n                batch_size: int = 8,\n                val_rate: float = 0.1,\n                cache_data: bool = False):\n    \"\"\"\n    读取划分数据集，并生成训练集和验证集的迭代器\n    :param data_root: 数据根目录\n    :param train_im_height: 训练输入网络图像的高度\n    :param train_im_width:  训练输入网络图像的宽度\n    :param val_im_height: 验证输入网络图像的高度\n    :param val_im_width:  验证输入网络图像的宽度\n    :param batch_size: 训练使用的batch size\n    :param val_rate:  将数据按给定比例划分到验证集\n    :param cache_data: 是否缓存数据\n    :return:\n    \"\"\"\n    assert train_im_height is not None\n    assert train_im_width is not None\n    if val_im_width is None:\n        val_im_width = train_im_width\n    if val_im_height is None:\n        val_im_height = train_im_height\n\n    train_img_path, train_img_label, val_img_path, val_img_label = read_split_data(data_root, val_rate=val_rate)\n    AUTOTUNE = tf.data.experimental.AUTOTUNE\n\n    def process_train_info(img_path, label):\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image, channels=3)\n        image = tf.cast(image, tf.float32)\n        image = tf.image.resize_with_crop_or_pad(image, train_im_height, train_im_width)\n        image = tf.image.random_flip_left_right(image)\n        image = (image / 255. - 0.5) / 0.5\n        return image, label\n\n    def process_val_info(img_path, label):\n        image = tf.io.read_file(img_path)\n        image = tf.image.decode_jpeg(image, channels=3)\n        image = tf.cast(image, tf.float32)\n        image = tf.image.resize_with_crop_or_pad(image, val_im_height, val_im_width)\n        image = (image / 255. - 0.5) / 0.5\n        return image, label\n\n    # Configure dataset for performance\n    def configure_for_performance(ds,\n                                  shuffle_size: int,\n                                  shuffle: bool = False,\n                                  cache: bool = False):\n        if cache:\n            ds = ds.cache()  # 读取数据后缓存至内存\n        if shuffle:\n            ds = ds.shuffle(buffer_size=shuffle_size)  # 打乱数据顺序\n        ds = ds.batch(batch_size)                      # 指定batch size\n        ds = ds.prefetch(buffer_size=AUTOTUNE)         # 在训练的同时提前准备下一个step的数据\n        return ds\n\n    train_ds = tf.data.Dataset.from_tensor_slices((tf.constant(train_img_path),\n                                                   tf.constant(train_img_label)))\n    total_train = len(train_img_path)\n\n    # Use Dataset.map to create a dataset of image, label pairs\n    train_ds = train_ds.map(process_train_info, num_parallel_calls=AUTOTUNE)\n    train_ds = configure_for_performance(train_ds, total_train, shuffle=True, cache=cache_data)\n\n    val_ds = tf.data.Dataset.from_tensor_slices((tf.constant(val_img_path),\n                                                 tf.constant(val_img_label)))\n    total_val = len(val_img_path)\n    # Use Dataset.map to create a dataset of image, label pairs\n    val_ds = val_ds.map(process_val_info, num_parallel_calls=AUTOTUNE)\n    val_ds = configure_for_performance(val_ds, total_val, cache=False)\n\n    return train_ds, val_ds\n"
  },
  {
    "path": "tensorflow_classification/vision_transformer/vit_model.py",
    "content": "\"\"\"\r\nrefer to:\r\nhttps://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py\r\n\"\"\"\r\nimport tensorflow as tf\r\nfrom tensorflow.keras import Model, layers, initializers\r\nimport numpy as np\r\n\r\n\r\nclass PatchEmbed(layers.Layer):\r\n    \"\"\"\r\n    2D Image to Patch Embedding\r\n    \"\"\"\r\n    def __init__(self, img_size=224, patch_size=16, embed_dim=768):\r\n        super(PatchEmbed, self).__init__()\r\n        self.embed_dim = embed_dim\r\n        self.img_size = (img_size, img_size)\r\n        self.grid_size = (img_size // patch_size, img_size // patch_size)\r\n        self.num_patches = self.grid_size[0] * self.grid_size[1]\r\n\r\n        self.proj = layers.Conv2D(filters=embed_dim, kernel_size=patch_size,\r\n                                  strides=patch_size, padding='SAME',\r\n                                  kernel_initializer=initializers.LecunNormal(),\r\n                                  bias_initializer=initializers.Zeros())\r\n\r\n    def call(self, inputs, **kwargs):\r\n        B, H, W, C = inputs.shape\r\n        assert H == self.img_size[0] and W == self.img_size[1], \\\r\n            f\"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]}).\"\r\n        x = self.proj(inputs)\r\n        # [B, H, W, C] -> [B, H*W, C]\r\n        x = tf.reshape(x, [B, self.num_patches, self.embed_dim])\r\n        return x\r\n\r\n\r\nclass ConcatClassTokenAddPosEmbed(layers.Layer):\r\n    def __init__(self, embed_dim=768, num_patches=196, name=None):\r\n        super(ConcatClassTokenAddPosEmbed, self).__init__(name=name)\r\n        self.embed_dim = embed_dim\r\n        self.num_patches = num_patches\r\n\r\n    def build(self, input_shape):\r\n        self.cls_token = self.add_weight(name=\"cls\",\r\n                                         shape=[1, 1, self.embed_dim],\r\n                                         initializer=initializers.Zeros(),\r\n                                         trainable=True,\r\n                                         dtype=tf.float32)\r\n        self.pos_embed = self.add_weight(name=\"pos_embed\",\r\n                                         shape=[1, self.num_patches + 1, self.embed_dim],\r\n                                         initializer=initializers.RandomNormal(stddev=0.02),\r\n                                         trainable=True,\r\n                                         dtype=tf.float32)\r\n\r\n    def call(self, inputs, **kwargs):\r\n        batch_size, _, _ = inputs.shape\r\n\r\n        # [1, 1, 768] -> [B, 1, 768]\r\n        cls_token = tf.broadcast_to(self.cls_token, shape=[batch_size, 1, self.embed_dim])\r\n        x = tf.concat([cls_token, inputs], axis=1)  # [B, 197, 768]\r\n        x = x + self.pos_embed\r\n\r\n        return x\r\n\r\n\r\nclass Attention(layers.Layer):\r\n    k_ini = initializers.GlorotUniform()\r\n    b_ini = initializers.Zeros()\r\n\r\n    def __init__(self,\r\n                 dim,\r\n                 num_heads=8,\r\n                 qkv_bias=False,\r\n                 qk_scale=None,\r\n                 attn_drop_ratio=0.,\r\n                 proj_drop_ratio=0.,\r\n                 name=None):\r\n        super(Attention, self).__init__(name=name)\r\n        self.num_heads = num_heads\r\n        head_dim = dim // num_heads\r\n        self.scale = qk_scale or head_dim ** -0.5\r\n        self.qkv = layers.Dense(dim * 3, use_bias=qkv_bias, name=\"qkv\",\r\n                                kernel_initializer=self.k_ini, bias_initializer=self.b_ini)\r\n        self.attn_drop = layers.Dropout(attn_drop_ratio)\r\n        self.proj = layers.Dense(dim, name=\"out\",\r\n                                 kernel_initializer=self.k_ini, bias_initializer=self.b_ini)\r\n        self.proj_drop = layers.Dropout(proj_drop_ratio)\r\n\r\n    def call(self, inputs, training=None):\r\n        # [batch_size, num_patches + 1, total_embed_dim]\r\n        B, N, C = inputs.shape\r\n\r\n        # qkv(): -> [batch_size, num_patches + 1, 3 * total_embed_dim]\r\n        qkv = self.qkv(inputs)\r\n        # reshape: -> [batch_size, num_patches + 1, 3, num_heads, embed_dim_per_head]\r\n        qkv = tf.reshape(qkv, [B, N, 3, self.num_heads, C // self.num_heads])\r\n        # transpose: -> [3, batch_size, num_heads, num_patches + 1, embed_dim_per_head]\r\n        qkv = tf.transpose(qkv, [2, 0, 3, 1, 4])\r\n        # [batch_size, num_heads, num_patches + 1, embed_dim_per_head]\r\n        q, k, v = qkv[0], qkv[1], qkv[2]\r\n\r\n        # transpose: -> [batch_size, num_heads, embed_dim_per_head, num_patches + 1]\r\n        # multiply -> [batch_size, num_heads, num_patches + 1, num_patches + 1]\r\n        attn = tf.matmul(a=q, b=k, transpose_b=True) * self.scale\r\n        attn = tf.nn.softmax(attn, axis=-1)\r\n        attn = self.attn_drop(attn, training=training)\r\n\r\n        # multiply -> [batch_size, num_heads, num_patches + 1, embed_dim_per_head]\r\n        x = tf.matmul(attn, v)\r\n        # transpose: -> [batch_size, num_patches + 1, num_heads, embed_dim_per_head]\r\n        x = tf.transpose(x, [0, 2, 1, 3])\r\n        # reshape: -> [batch_size, num_patches + 1, total_embed_dim]\r\n        x = tf.reshape(x, [B, N, C])\r\n\r\n        x = self.proj(x)\r\n        x = self.proj_drop(x, training=training)\r\n        return x\r\n\r\n\r\nclass MLP(layers.Layer):\r\n    \"\"\"\r\n    MLP as used in Vision Transformer, MLP-Mixer and related networks\r\n    \"\"\"\r\n\r\n    k_ini = initializers.GlorotUniform()\r\n    b_ini = initializers.RandomNormal(stddev=1e-6)\r\n\r\n    def __init__(self, in_features, mlp_ratio=4.0, drop=0., name=None):\r\n        super(MLP, self).__init__(name=name)\r\n        self.fc1 = layers.Dense(int(in_features * mlp_ratio), name=\"Dense_0\",\r\n                                kernel_initializer=self.k_ini, bias_initializer=self.b_ini)\r\n        self.act = layers.Activation(\"gelu\")\r\n        self.fc2 = layers.Dense(in_features, name=\"Dense_1\",\r\n                                kernel_initializer=self.k_ini, bias_initializer=self.b_ini)\r\n        self.drop = layers.Dropout(drop)\r\n\r\n    def call(self, inputs, training=None):\r\n        x = self.fc1(inputs)\r\n        x = self.act(x)\r\n        x = self.drop(x, training=training)\r\n        x = self.fc2(x)\r\n        x = self.drop(x, training=training)\r\n        return x\r\n\r\n\r\nclass Block(layers.Layer):\r\n    def __init__(self,\r\n                 dim,\r\n                 num_heads=8,\r\n                 qkv_bias=False,\r\n                 qk_scale=None,\r\n                 drop_ratio=0.,\r\n                 attn_drop_ratio=0.,\r\n                 drop_path_ratio=0.,\r\n                 name=None):\r\n        super(Block, self).__init__(name=name)\r\n        self.norm1 = layers.LayerNormalization(epsilon=1e-6, name=\"LayerNorm_0\")\r\n        self.attn = Attention(dim, num_heads=num_heads,\r\n                              qkv_bias=qkv_bias, qk_scale=qk_scale,\r\n                              attn_drop_ratio=attn_drop_ratio, proj_drop_ratio=drop_ratio,\r\n                              name=\"MultiHeadAttention\")\r\n        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here\r\n        self.drop_path = layers.Dropout(rate=drop_path_ratio, noise_shape=(None, 1, 1)) if drop_path_ratio > 0. \\\r\n            else layers.Activation(\"linear\")\r\n        self.norm2 = layers.LayerNormalization(epsilon=1e-6, name=\"LayerNorm_1\")\r\n        self.mlp = MLP(dim, drop=drop_ratio, name=\"MlpBlock\")\r\n\r\n    def call(self, inputs, training=None):\r\n        x = inputs + self.drop_path(self.attn(self.norm1(inputs)), training=training)\r\n        x = x + self.drop_path(self.mlp(self.norm2(x)), training=training)\r\n        return x\r\n\r\n\r\nclass VisionTransformer(Model):\r\n    def __init__(self, img_size=224, patch_size=16, embed_dim=768,\r\n                 depth=12, num_heads=12, qkv_bias=True, qk_scale=None,\r\n                 drop_ratio=0., attn_drop_ratio=0., drop_path_ratio=0.,\r\n                 representation_size=None, num_classes=1000, name=\"ViT-B/16\"):\r\n        super(VisionTransformer, self).__init__(name=name)\r\n        self.num_classes = num_classes\r\n        self.embed_dim = embed_dim\r\n        self.depth = depth\r\n        self.qkv_bias = qkv_bias\r\n\r\n        self.patch_embed = PatchEmbed(img_size=img_size, patch_size=patch_size, embed_dim=embed_dim)\r\n        num_patches = self.patch_embed.num_patches\r\n        self.cls_token_pos_embed = ConcatClassTokenAddPosEmbed(embed_dim=embed_dim,\r\n                                                               num_patches=num_patches,\r\n                                                               name=\"cls_pos\")\r\n\r\n        self.pos_drop = layers.Dropout(drop_ratio)\r\n\r\n        dpr = np.linspace(0., drop_path_ratio, depth)  # stochastic depth decay rule\r\n        self.blocks = [Block(dim=embed_dim, num_heads=num_heads, qkv_bias=qkv_bias,\r\n                             qk_scale=qk_scale, drop_ratio=drop_ratio, attn_drop_ratio=attn_drop_ratio,\r\n                             drop_path_ratio=dpr[i], name=\"encoderblock_{}\".format(i))\r\n                       for i in range(depth)]\r\n\r\n        self.norm = layers.LayerNormalization(epsilon=1e-6, name=\"encoder_norm\")\r\n\r\n        if representation_size:\r\n            self.has_logits = True\r\n            self.pre_logits = layers.Dense(representation_size, activation=\"tanh\", name=\"pre_logits\")\r\n        else:\r\n            self.has_logits = False\r\n            self.pre_logits = layers.Activation(\"linear\")\r\n\r\n        self.head = layers.Dense(num_classes, name=\"head\", kernel_initializer=initializers.Zeros())\r\n\r\n    def call(self, inputs, training=None):\r\n        # [B, H, W, C] -> [B, num_patches, embed_dim]\r\n        x = self.patch_embed(inputs)  # [B, 196, 768]\r\n        x = self.cls_token_pos_embed(x)  # [B, 176, 768]\r\n        x = self.pos_drop(x, training=training)\r\n\r\n        for block in self.blocks:\r\n            x = block(x, training=training)\r\n\r\n        x = self.norm(x)\r\n        x = self.pre_logits(x[:, 0])\r\n        x = self.head(x)\r\n\r\n        return x\r\n\r\n\r\ndef vit_base_patch16_224_in21k(num_classes: int = 21843, has_logits: bool = True):\r\n    \"\"\"\r\n    ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).\r\n    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.\r\n    \"\"\"\r\n    model = VisionTransformer(img_size=224,\r\n                              patch_size=16,\r\n                              embed_dim=768,\r\n                              depth=12,\r\n                              num_heads=12,\r\n                              representation_size=768 if has_logits else None,\r\n                              num_classes=num_classes,\r\n                              name=\"ViT-B_16\")\r\n    return model\r\n\r\n\r\ndef vit_base_patch32_224_in21k(num_classes: int = 21843, has_logits: bool = True):\r\n    \"\"\"\r\n    ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).\r\n    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.\r\n    \"\"\"\r\n    model = VisionTransformer(img_size=224,\r\n                              patch_size=32,\r\n                              embed_dim=768,\r\n                              depth=12,\r\n                              num_heads=12,\r\n                              representation_size=768 if has_logits else None,\r\n                              num_classes=num_classes,\r\n                              name=\"ViT-B_32\")\r\n    return model\r\n\r\n\r\ndef vit_large_patch16_224_in21k(num_classes: int = 21843, has_logits: bool = True):\r\n    \"\"\"\r\n    ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).\r\n    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.\r\n    \"\"\"\r\n    model = VisionTransformer(img_size=224,\r\n                              patch_size=16,\r\n                              embed_dim=1024,\r\n                              depth=24,\r\n                              num_heads=16,\r\n                              representation_size=1024 if has_logits else None,\r\n                              num_classes=num_classes,\r\n                              name=\"ViT-L_16\")\r\n    return model\r\n\r\n\r\ndef vit_large_patch32_224_in21k(num_classes: int = 21843, has_logits: bool = True):\r\n    \"\"\"\r\n    ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929).\r\n    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.\r\n    \"\"\"\r\n    model = VisionTransformer(img_size=224,\r\n                              patch_size=32,\r\n                              embed_dim=1024,\r\n                              depth=24,\r\n                              num_heads=16,\r\n                              representation_size=1024 if has_logits else None,\r\n                              num_classes=num_classes,\r\n                              name=\"ViT-L_32\")\r\n    return model\r\n\r\n\r\ndef vit_huge_patch14_224_in21k(num_classes: int = 21843, has_logits: bool = True):\r\n    \"\"\"\r\n    ViT-Huge model (ViT-H/14) from original paper (https://arxiv.org/abs/2010.11929).\r\n    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.\r\n    \"\"\"\r\n    model = VisionTransformer(img_size=224,\r\n                              patch_size=14,\r\n                              embed_dim=1280,\r\n                              depth=32,\r\n                              num_heads=16,\r\n                              representation_size=1280 if has_logits else None,\r\n                              num_classes=num_classes,\r\n                              name=\"ViT-H_14\")\r\n    return model\r\n"
  }
]