Copy disabled (too large)
Download .txt
Showing preview only (20,468K chars total). Download the full file to get everything.
Repository: Zzh-tju/CIoU
Branch: master
Commit: a9f589f28053
Files: 96
Total size: 19.5 MB
Directory structure:
gitextract_fc4uicbd/
├── .gitignore
├── LICENSE
├── README.md
├── README_zh-CN.md
├── backbone.py
├── data/
│ ├── __init__.py
│ ├── coco.py
│ ├── config.py
│ ├── grid.npy
│ └── scripts/
│ ├── COCO.sh
│ ├── COCO_test.sh
│ └── mix_sets.py
├── environment.yml
├── eval.py
├── external/
│ └── DCNv2/
│ ├── LICENSE
│ ├── README.md
│ ├── dcn_v2.py
│ ├── setup.py
│ ├── src/
│ │ ├── cpu/
│ │ │ ├── dcn_v2_cpu.cpp
│ │ │ └── vision.h
│ │ ├── cuda/
│ │ │ ├── dcn_v2_cuda.cu
│ │ │ ├── dcn_v2_im2col_cuda.cu
│ │ │ ├── dcn_v2_im2col_cuda.h
│ │ │ ├── dcn_v2_psroi_pooling_cuda.cu
│ │ │ └── vision.h
│ │ ├── dcn_v2.h
│ │ └── vision.cpp
│ └── test.py
├── layers/
│ ├── __init__.py
│ ├── box_utils.py
│ ├── functions/
│ │ ├── __init__.py
│ │ └── detection.py
│ ├── interpolate.py
│ ├── modules/
│ │ ├── __init__.py
│ │ └── multibox_loss.py
│ └── output_utils.py
├── run_coco_eval.py
├── scripts/
│ ├── augment_bbox.py
│ ├── bbox_recall.py
│ ├── cluster_bbox_sizes.py
│ ├── compute_masks.py
│ ├── convert_darknet.py
│ ├── convert_sbd.py
│ ├── eval.sh
│ ├── make_grid.py
│ ├── optimize_bboxes.py
│ ├── parse_eval.py
│ ├── plot_loss.py
│ ├── resume.sh
│ ├── save_bboxes.py
│ ├── train.sh
│ └── unpack_statedict.py
├── train.py
├── utils/
│ ├── __init__.py
│ ├── augmentations.py
│ ├── cython_nms.pyx
│ ├── functions.py
│ ├── logger.py
│ ├── nvinfo.py
│ └── timer.py
├── web/
│ ├── css/
│ │ ├── index.css
│ │ ├── list.css
│ │ ├── toggle.css
│ │ └── viewer.css
│ ├── dets/
│ │ ├── ssd300.json
│ │ ├── ssd550.json
│ │ ├── ssd550_resnet101.json
│ │ ├── test.json
│ │ ├── yolact_base.json
│ │ ├── yolact_darknet53.json
│ │ ├── yolact_im700.json
│ │ ├── yolact_resnet101_conv4.json
│ │ ├── yolact_resnet101_maskrcnn.json
│ │ ├── yolact_resnet101_maskrcnn_1.json
│ │ ├── yolact_resnet50.json
│ │ ├── yrm12.json
│ │ ├── yrm13.json
│ │ ├── yrm16_2.json
│ │ ├── yrm18.json
│ │ ├── yrm19.json
│ │ ├── yrm21.json
│ │ ├── yrm25_b.json
│ │ ├── yrm28_2_perfect.json
│ │ ├── yrm35_crop.json
│ │ └── yrm35_retina.json
│ ├── index.html
│ ├── iou.html
│ ├── scripts/
│ │ ├── index.js
│ │ ├── iou.js
│ │ ├── jquery.js
│ │ ├── js.cookie.js
│ │ ├── utils.js
│ │ └── viewer.js
│ ├── server.py
│ └── viewer.html
└── yolact.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
================================================
FILE: LICENSE
================================================
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users. We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors. You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so. This is fundamentally incompatible with the aim of
protecting users' freedom to change the software. The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable. Therefore, we
have designed this version of the GPL to prohibit the practice for those
products. If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.
Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary. To prevent this, the GPL assures that
patents cannot be used to render the program non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
Also add information on how to contact you by electronic and paper mail.
If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:
<program> Copyright (C) <year> <name of author>
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
<https://www.gnu.org/licenses/>.
The GNU General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
<https://www.gnu.org/licenses/why-not-lgpl.html>.
================================================
FILE: README.md
================================================
<img src="CIoU.png" width="800px"/>
### English | [简体中文](README_zh-CN.md)
## Complete-IoU Loss and Cluster-NMS for Improving Object Detection and Instance Segmentation.
Our paper is accepted by **IEEE Transactions on Cybernetics (TCYB)**.
### This repo is based on YOLACT++.
This is the code for our papers:
- [Distance-IoU Loss: Faster and Better Learning for Bounding Box Regression](https://arxiv.org/abs/1911.08287)
- [Enhancing Geometric Factors into Model Learning and Inference for Object Detection and Instance Segmentation](https://arxiv.org/abs/2005.03572)
```
@Inproceedings{zheng2020diou,
author = {Zheng, Zhaohui and Wang, Ping and Liu, Wei and Li, Jinze and Ye, Rongguang and Ren, Dongwei},
title = {Distance-IoU Loss: Faster and Better Learning for Bounding Box Regression},
booktitle = {The AAAI Conference on Artificial Intelligence (AAAI)},
pages = {12993--13000},
year = {2020}
}
@Article{zheng2021ciou,
author = {Zheng, Zhaohui and Wang, Ping and Ren, Dongwei and Liu, Wei and Ye, Rongguang and Hu, Qinghua and Zuo, Wangmeng},
title = {Enhancing Geometric Factors in Model Learning and Inference for Object Detection and Instance Segmentation},
journal = {IEEE Transactions on cybernetics},
volume = {52},
number = {8},
pages = {8574--8586},
year = {2021},
publisher = {IEEE}
}
```
## Description of Cluster-NMS and Its Usage
An example diagram of our Cluster-NMS, where X denotes IoU matrix which is calculated by `X=jaccard(boxes,boxes).triu_(diagonal=1) > nms_thresh` after sorted by score descending. (Here use 0,1 for visualization.)
<img src="cluster-nms01.png" width="1150px"/>
<img src="cluster-nms02.png" width="1150px"/>
The inputs of NMS are `boxes` with size [n,4] and `scores` with size [80,n]. (take coco as example)
There are two ways for NMS. One is that all classes have the same number of boxes. First, we use top k=200 to select the top 200 detections for every class. Then `boxes` will be [80,200,4]. Do Cluster-NMS and keep the boxes with `scores>0.01`. Finally, return top 100 boxes across all classes.
The other approach is that different classes have different numbers of boxes. First, we use a score threshold (e.g. 0.01) to filter out most low score detection boxes. It results in the number of remaining boxes in different classes may be different. Then put all the boxes together and sorted by score descending. (Note that the same box may appear more than once, because its scores of multiple classes are greater than the threshold 0.01.) Adding offset for all the `boxes` according to their class labels. (use `torch.arange(0,80)`.) For example, since the coordinates (x1,y1,x2,y2) of all the boxes are on interval (0,1). By adding offset, if a box belongs to class 61, its coordinates will on interval (60,61). After that, the IoU of boxes belonging to different classes will be 0. (because they are treated as different clusters.) Do Cluster-NMS and return top 100 boxes across all classes. (For this method, please refer to another our repository https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/detection/detection.py)
## Getting Started
### 1) New released! CIoU and Cluster-NMS
1. YOLACT (See [YOLACT](https://github.com/Zzh-tju/CIoU#YOLACT))
2. YOLOv3-pytorch [https://github.com/Zzh-tju/ultralytics-YOLOv3-Cluster-NMS](https://github.com/Zzh-tju/ultralytics-YOLOv3-Cluster-NMS)
3. YOLOv5 (Support batch mode Cluster-NMS. It will speed up NMS when turning on test-time augmentation like multi-scale testing.) [https://github.com/Zzh-tju/yolov5](https://github.com/Zzh-tju/yolov5)
4. SSD-pytorch [https://github.com/Zzh-tju/DIoU-SSD-pytorch](https://github.com/Zzh-tju/DIoU-SSD-pytorch)
### 2) DIoU and CIoU losses into Detection Algorithms
DIoU and CIoU losses are incorporated into state-of-the-art detection algorithms, including YOLO v3, SSD and Faster R-CNN.
The details of implementation and comparison can be respectively found in the following links.
1. YOLO v3 [https://github.com/Zzh-tju/DIoU-darknet](https://github.com/Zzh-tju/DIoU-darknet)
2. SSD [https://github.com/Zzh-tju/DIoU-SSD-pytorch](https://github.com/Zzh-tju/DIoU-SSD-pytorch)
3. Faster R-CNN [https://github.com/Zzh-tju/DIoU-pytorch-detectron](https://github.com/Zzh-tju/DIoU-pytorch-detectron)
4. Simulation Experiment [https://github.com/Zzh-tju/DIoU](https://github.com/Zzh-tju/DIoU)
# YOLACT
### Codes location and options
Please take a look at `ciou` function of [layers/modules/multibox_loss.py](layers/modules/multibox_loss.py) for our CIoU loss implementation in PyTorch.
Currently, NMS surports two modes: (See [eval.py](eval.py))
1. Cross-class mode, which ignores classes. (`cross_class_nms=True`, faster than per-class mode but with a slight performance drop.)
2. Per-class mode. (`cross_class_nms=False`)
Currently, NMS supports `fast_nms`, `cluster_nms`, `cluster_diounms`, `spm`, `spm_dist`, `spm_dist_weighted`.
See [layers/functions/detection.py](layers/functions/detection.py) for our Cluster-NMS implementation in PyTorch.
# Installation
In order to use YOLACT++, make sure you compile the DCNv2 code.
- Clone this repository and enter it:
```Shell
git clone https://github.com/Zzh-tju/CIoU.git
cd yolact
```
- Set up the environment using one of the following methods:
- Using [Anaconda](https://www.anaconda.com/distribution/)
- Run `conda env create -f environment.yml`
- Manually with pip
- Set up a Python3 environment (e.g., using virtenv).
- Install [Pytorch](http://pytorch.org/) 1.0.1 (or higher) and TorchVision.
- Install some other packages:
```Shell
# Cython needs to be installed before pycocotools
pip install cython
pip install opencv-python pillow pycocotools matplotlib
```
- If you'd like to train YOLACT, download the COCO dataset and the 2014/2017 annotations. Note that this script will take a while and dump 21gb of files into `./data/coco`.
```Shell
sh data/scripts/COCO.sh
```
- If you'd like to evaluate YOLACT on `test-dev`, download `test-dev` with this script.
```Shell
sh data/scripts/COCO_test.sh
```
- If you want to use YOLACT++, compile deformable convolutional layers (from [DCNv2](https://github.com/CharlesShang/DCNv2/tree/pytorch_1.0)).
Make sure you have the latest CUDA toolkit installed from [NVidia's Website](https://developer.nvidia.com/cuda-toolkit).
```Shell
cd external/DCNv2
python setup.py build develop
```
# Evaluation
Here are our YOLACT models (released on May 5th, 2020) along with their FPS on a GTX 1080 Ti and mAP on `coco 2017 val`:
The training is carried on two GTX 1080 Ti with command:
`
python train.py --config=yolact_base_config --batch_size=8
`
| Image Size | Backbone | Loss | NMS | FPS | box AP | mask AP | Weights |
|:----:|:-------------:|:-------:|:----:|:----:|:----:|:----:|----------------------------------------------------------------------------------------------------------------------|
| 550 | Resnet101-FPN | SL1 | Fast NMS | 30.6 | 31.5 | 29.1 |[SL1.pth](https://share.weiyun.com/5N840Hm) |
| 550 | Resnet101-FPN | CIoU | Fast NMS | 30.6 | 32.1 | 29.6 | [CIoU.pth](https://share.weiyun.com/5EtJ4dJ) |
To evalute the model, put the corresponding weights file in the `./weights` directory and run one of the following commands. The name of each config is everything before the numbers in the file name (e.g., `yolact_base` for `yolact_base_54_800000.pth`).
## Quantitative Results on COCO
```
# Quantitatively evaluate a trained model on the entire validation set. Make sure you have COCO downloaded as above.
# Output a COCOEval json to submit to the website or to use the run_coco_eval.py script.
# This command will create './results/bbox_detections.json' and './results/mask_detections.json' for detection and instance segmentation respectively.
python eval.py --trained_model=weights/yolact_base_54_800000.pth --output_coco_json
# You can run COCOEval on the files created in the previous command. The performance should match my implementation in eval.py.
python run_coco_eval.py
# To output a coco json file for test-dev, make sure you have test-dev downloaded from above and go
python eval.py --trained_model=weights/yolact_base_54_800000.pth --output_coco_json --dataset=coco2017_testdev_dataset
```
## Qualitative Results on COCO
```
# Display qualitative results on COCO. From here on I'll use a confidence threshold of 0.15.
python eval.py --trained_model=weights/yolact_base_54_800000.pth --score_threshold=0.15 --top_k=15 --display
```
## Cluster-NMS Using Benchmark on COCO
```
python eval.py --trained_model=weights/yolact_base_54_800000.pth --benchmark
```
#### Hardware
- 1 GTX 1080 Ti
- Intel(R) Core(TM) i7-6850K CPU @ 3.60GHz
| Image Size | Backbone | Loss | NMS | FPS | box AP | box AP75 | box AR100 | mask AP | mask AP75 | mask AR100 |
|:----:|:-------------:|:-------:|:------------------------------------:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|
| 550 | Resnet101-FPN | CIoU | Fast NMS |**30.6**| 32.1 | 33.9 | 43.0 | 29.6 | 30.9 | 40.3 |
| 550 | Resnet101-FPN | CIoU | Original NMS | 11.5 | 32.5 | 34.1 | 45.1 | 29.7 | 31.0 | 41.7 |
| 550 | Resnet101-FPN | CIoU | Cluster-NMS | 28.8 | 32.5 | 34.1 | 45.2 | 29.7 | 31.0 | 41.7 |
| 550 | Resnet101-FPN | CIoU | SPM Cluster-NMS | 28.6 | 33.1 | 35.2 | 48.8 |**30.3**|**31.7**| 43.6 |
| 550 | Resnet101-FPN | CIoU | SPM + Distance Cluster-NMS | 27.1 | 33.2 | 35.2 |**49.2**| 30.2 |**31.7**|**43.8**|
| 550 | Resnet101-FPN | CIoU | SPM + Distance + Weighted Cluster-NMS | 26.5 |**33.4**|**35.5**| 49.1 |**30.3**| 31.6 |**43.8**|
The following table is evaluated by using their pretrained weight of YOLACT. ([yolact_resnet50_54_800000.pth](https://ucdavis365-my.sharepoint.com/:u:/g/personal/yongjaelee_ucdavis_edu/EUVpxoSXaqNIlssoLKOEoCcB1m0RpzGq_Khp5n1VX3zcUw))
| Image Size | Backbone | Loss | NMS | FPS | box AP | box AP75 | box AR100 | mask AP | mask AP75 | mask AR100 |
|:----:|:-------------:|:-------:|:-----------------------------------:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|
| 550 | Resnet50-FPN | SL1 | Fast NMS |**41.6**| 30.2 | 31.9 | 42.0 | 28.0 | 29.1 | 39.4 |
| 550 | Resnet50-FPN | SL1 | Original NMS | 12.8 | 30.7 | 32.0 | 44.1 | 28.1 | 29.2 | 40.7 |
| 550 | Resnet50-FPN | SL1 | Cluster-NMS | 38.2 | 30.7 | 32.0 | 44.1 | 28.1 | 29.2 | 40.7 |
| 550 | Resnet50-FPN | SL1 | SPM Cluster-NMS | 37.7 | 31.3 | 33.2 | 48.0 |**28.8**|**29.9**| 42.8 |
| 550 | Resnet50-FPN | SL1 | SPM + Distance Cluster-NMS | 35.2 | 31.3 | 33.3 | 48.2 | 28.7 |**29.9**| 42.9 |
| 550 | Resnet50-FPN | SL1 | SPM + Distance + Weighted Cluster-NMS | 34.2 |**31.8**|**33.9**|**48.3**|**28.8**|**29.9**|**43.0**|
The following table is evaluated by using their pretrained weight of YOLACT. ([yolact_base_54_800000.pth](https://drive.google.com/file/d/1UYy3dMapbH1BnmtZU4WH1zbYgOzzHHf_/view?usp=sharing))
| Image Size | Backbone | Loss | NMS | FPS | box AP | box AP75 | box AR100 | mask AP | mask AP75 | mask AR100 |
|:----:|:-------------:|:-------:|:-----------------------------------:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|
| 550 | Resnet101-FPN | SL1 | Fast NMS |**30.6**| 32.5 | 34.6 | 43.9 | 29.8 | 31.3 | 40.8 |
| 550 | Resnet101-FPN | SL1 | Original NMS | 11.9 | 32.9 | 34.8 | 45.8 | 29.9 | 31.4 | 42.1 |
| 550 | Resnet101-FPN | SL1 | Cluster-NMS | 29.2 | 32.9 | 34.8 | 45.9 | 29.9 | 31.4 | 42.1 |
| 550 | Resnet101-FPN | SL1 | SPM Cluster-NMS | 28.8 | 33.5 | 35.9 | 49.7 |**30.5**|**32.1**| 44.1 |
| 550 | Resnet101-FPN | SL1 | SPM + Distance Cluster-NMS | 27.5 | 33.5 | 35.9 |**50.2**| 30.4 | 32.0 |**44.3**|
| 550 | Resnet101-FPN | SL1 | SPM + Distance + Weighted Cluster-NMS | 26.7 |**34.0**|**36.6**| 49.9 |**30.5**| 32.0 |**44.3**|
The following table is evaluated by using their pretrained weight of YOLACT++. ([yolact_plus_base_54_800000.pth](https://ucdavis365-my.sharepoint.com/:u:/g/personal/yongjaelee_ucdavis_edu/EVQ62sF0SrJPrl_68onyHF8BpG7c05A8PavV4a849sZgEA))
| Image Size | Backbone | Loss | NMS | FPS | box AP | box AP75 | box AR100 | mask AP | mask AP75 | mask AR100 |
|:----:|:-------------:|:-------:|:-----------------------------------:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|
| 550 | Resnet101-FPN | SL1 | Fast NMS |**25.1**| 35.8 | 38.7 | 45.5 | 34.4 | 36.8 | 42.6 |
| 550 | Resnet101-FPN | SL1 | Original NMS | 10.9 | 36.4 | 39.1 | 48.0 | 34.7 | 37.1 | 44.1 |
| 550 | Resnet101-FPN | SL1 | Cluster-NMS | 23.7 | 36.4 | 39.1 | 48.0 | 34.7 | 37.1 | 44.1 |
| 550 | Resnet101-FPN | SL1 | SPM Cluster-NMS | 23.2 | 36.9 | 40.1 | 52.8 |**35.0**| 37.5 |**46.3**|
| 550 | Resnet101-FPN | SL1 | SPM + Distance Cluster-NMS | 22.0 | 36.9 | 40.2 |**53.0**| 34.9 | 37.5 |**46.3**|
| 550 | Resnet101-FPN | SL1 | SPM + Distance + Weighted Cluster-NMS | 21.7 |**37.4**|**40.6**| 52.5 |**35.0**|**37.6**|**46.3**|
#### Note:
- Things we did but did not appear in the paper: SPM + Distance + Weighted Cluster-NMS. Here the box coordinate weighted average is only performed in `IoU> 0.8`. We searched that `IoU>0.5` is not good for YOLACT and `IoU>0.9` is almost same to `SPM + Distance Cluster-NMS`. (Refer to [CAD](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8265304) for the details of Weighted-NMS.)
- The Original NMS implemented by YOLACT is faster than ours, because they firstly use a score threshold (0.05) to get the set of candidate boxes, then do NMS will be faster (taking YOLACT ResNet101-FPN as example, 22 ~ 23 FPS with a slight performance drop). In order to get the same result with our Cluster-NMS, we modify the process of Original NMS.
- Note that Torchvision NMS has the fastest speed, that is owing to CUDA implementation and engineering accelerations (like upper triangular IoU matrix only). However, our Cluster-NMS requires less iterations for NMS and can also be further accelerated by adopting engineering tricks.
- Currently, Torchvision NMS use IoU as criterion, not DIoU. However, if we directly replace IoU with DIoU in Original NMS, it will costs much more time due to the sequence operation. Now, Cluster-DIoU-NMS will significantly speed up DIoU-NMS and obtain exactly the same result.
- Torchvision NMS is a function in Torchvision>=0.3, and our Cluster-NMS can be applied to any projects that use low version of Torchvision and other deep learning frameworks as long as it can do matrix operations. **No other import, no need to compile, less iteration, fully GPU-accelerated and better performance**.
## Images
```Shell
# Display qualitative results on the specified image.
python eval.py --trained_model=weights/yolact_base_54_800000.pth --score_threshold=0.15 --top_k=15 --ima
ge=my_image.png
# Process an image and save it to another file.
python eval.py --trained_model=weights/yolact_base_54_800000.pth --score_threshold=0.15 --top_k=15 --image=input_image.png:output_image.png
# Process a whole folder of images.
python eval.py --trained_model=weights/yolact_base_54_800000.pth --score_threshold=0.15 --top_k=15 --images=path/to/input/folder:path/to/output/folder
```
## Video
```Shell
# Display a video in real-time. "--video_multiframe" will process that many frames at once for improved performance.
# If you want, use "--display_fps" to draw the FPS directly on the frame.
python eval.py --trained_model=weights/yolact_base_54_800000.pth --score_threshold=0.15 --top_k=15 --video_multiframe=4 --video=my_video.mp4
# Display a webcam feed in real-time. If you have multiple webcams pass the index of the webcam you want instead of 0.
python eval.py --trained_model=weights/yolact_base_54_800000.pth --score_threshold=0.15 --top_k=15 --video_multiframe=4 --video=0
# Process a video and save it to another file. This uses the same pipeline as the ones above now, so it's fast!
python eval.py --trained_model=weights/yolact_base_54_800000.pth --score_threshold=0.15 --top_k=15 --video_multiframe=4 --video=input_video.mp4:output_video.mp4
```
As you can tell, `eval.py` can do a ton of stuff. Run the `--help` command to see everything it can do.
```Shell
python eval.py --help
```
# Training
By default, we train on COCO. Make sure to download the entire dataset using the commands above.
- To train, grab an imagenet-pretrained model and put it in `./weights`.
- For Resnet101, download `resnet101_reducedfc.pth` from [here](https://drive.google.com/file/d/1tvqFPd4bJtakOlmn-uIA492g2qurRChj/view?usp=sharing).
- For Resnet50, download `resnet50-19c8e357.pth` from [here](https://drive.google.com/file/d/1Jy3yCdbatgXa5YYIdTCRrSV0S9V5g1rn/view?usp=sharing).
- For Darknet53, download `darknet53.pth` from [here](https://drive.google.com/file/d/17Y431j4sagFpSReuPNoFcj9h7azDTZFf/view?usp=sharing).
- Run one of the training commands below.
- Note that you can press ctrl+c while training and it will save an `*_interrupt.pth` file at the current iteration.
- All weights are saved in the `./weights` directory by default with the file name `<config>_<epoch>_<iter>.pth`.
```Shell
# Trains using the base config with a batch size of 8 (the default).
python train.py --config=yolact_base_config
# Trains yolact_base_config with a batch_size of 5. For the 550px models, 1 batch takes up around 1.5 gigs of VRAM, so specify accordingly.
python train.py --config=yolact_base_config --batch_size=5
# Resume training yolact_base with a specific weight file and start from the iteration specified in the weight file's name.
python train.py --config=yolact_base_config --resume=weights/yolact_base_10_32100.pth --start_iter=-1
# Use the help option to see a description of all available command line arguments
python train.py --help
```
## Multi-GPU Support
YOLACT now supports multiple GPUs seamlessly during training:
- Before running any of the scripts, run: `export CUDA_VISIBLE_DEVICES=[gpus]`
- Where you should replace [gpus] with a comma separated list of the index of each GPU you want to use (e.g., 0,1,2,3).
- You should still do this if only using 1 GPU.
- You can check the indices of your GPUs with `nvidia-smi`.
- Then, simply set the batch size to `8*num_gpus` with the training commands above. The training script will automatically scale the hyperparameters to the right values.
- If you have memory to spare you can increase the batch size further, but keep it a multiple of the number of GPUs you're using.
- If you want to allocate the images per GPU specific for different GPUs, you can use `--batch_alloc=[alloc]` where [alloc] is a comma seprated list containing the number of images on each GPU. This must sum to `batch_size`.
## Acknowledgments
Thank you to [Daniel Bolya](https://github.com/dbolya/) for his fork of [YOLACT & YOLACT++](https://github.com/dbolya/yolact), which is an exellent work for real-time instance segmentation.
================================================
FILE: README_zh-CN.md
================================================
<img src="CIoU.png" width="800px"/>
### [English](README.md) | 简体中文
## Complete-IoU Loss and Cluster-NMS for Improving Object Detection and Instance Segmentation.
我们的论文已收录于**IEEE Transactions on Cybernetics (TCYB)**.
### 本代码基于YOLACT++.
这是我们论文的代码实现:
- [Distance-IoU Loss: Faster and Better Learning for Bounding Box Regression](https://arxiv.org/abs/1911.08287)
- [Enhancing Geometric Factors into Model Learning and Inference for Object Detection and Instance Segmentation](https://arxiv.org/abs/2005.03572)
```
@Inproceedings{zheng2020diou,
author = {Zheng, Zhaohui and Wang, Ping and Liu, Wei and Li, Jinze and Ye, Rongguang and Ren, Dongwei},
title = {Distance-IoU Loss: Faster and Better Learning for Bounding Box Regression},
booktitle = {The AAAI Conference on Artificial Intelligence (AAAI)},
year = {2020},
}
@Article{zheng2021ciou,
author = {Zheng, Zhaohui and Wang, Ping and Ren, Dongwei and Liu, Wei and Ye, Rongguang and Hu, Qinghua and Zuo, Wangmeng},
title = {Enhancing Geometric Factors in Model Learning and Inference for Object Detection and Instance Segmentation},
booktitle = {IEEE Transactions on Cybernetics},
year = {2021},
}
```
## Cluster-NMS的描述与使用
下面是Cluster-NMS算法的示意图,其中 X 表示IoU矩阵,它是由`X=jaccard(boxes,boxes).triu_(diagonal=1) > nms_thresh`计算得到的,当然`boxes`会事先用分类score降序排列。
<img src="cluster-nms01.png" width="1150px"/>
<img src="cluster-nms02.png" width="1150px"/>
NMS的输入是形状为[n,4]的`boxes`,以及形状为[80,n]的分类`scores`。(以coco为例)
NMS有两种途径,各有各的特点。
第一种是所有类别具有相同数量的框。首先,我们对每个类依照`scores`选取top k=200个框。于是`boxes`的形状变为[80,200,4]。执行Cluster-NMS主体程序并最后保留`scores>0.01`的框。最终返回前100个高分的框。
第二种是不同的类别具有不同数量的框。首先,我们一开始就用`scores`阈值(比如0.01)过滤掉了大多数的低分检测框。这一步导致了不同的类别可能剩下了不同数量的框。并且注意到同一个框可能出现很多次,因为其可能有多个类别的`score`都大于了0.01这个阈值。接着把所有的框放到一起并按照`score`降序排列。
接着我们给boxes添加偏移量,使用`torch.arange(0,80)`。这将导致不同类别的框不再相交,它们的IoU一定为0。这是因为(x1,y1,x2,y2)的坐标都在(0,1)区间内,因此我给每个框的坐标都加上它的类标签,就可以强行让不同类别的框分到不同的cluster中。例如某个框属于第61类,其坐标本来都介于(0,1)之间,加上了类标签偏移量后,它的坐标都将介于(60,61)这个区间。
最后执行Cluster-NMS主体程序,并返回前100个框。对于这种途径的Cluster-NMS方法,可参阅我们的另一代码库[SSD](https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/detection/detection.py)
## 指南
### 1) CIoU 与 Cluster-NMS
1. YOLACT (See [YOLACT](https://github.com/Zzh-tju/CIoU#YOLACT))
2. YOLOv3-pytorch [https://github.com/Zzh-tju/ultralytics-YOLOv3-Cluster-NMS](https://github.com/Zzh-tju/ultralytics-YOLOv3-Cluster-NMS)
3. YOLOv5 (支持批处理模式的Cluster-NMS。当使用测试阶段增强时,如多尺度测试,将大大加速NMS。) [https://github.com/Zzh-tju/yolov5](https://github.com/Zzh-tju/yolov5)
4. SSD-pytorch [https://github.com/Zzh-tju/DIoU-SSD-pytorch](https://github.com/Zzh-tju/DIoU-SSD-pytorch)
### 2) DIoU 与 CIoU 纳入检测器
1. YOLO v3 [https://github.com/Zzh-tju/DIoU-darknet](https://github.com/Zzh-tju/DIoU-darknet)
2. SSD [https://github.com/Zzh-tju/DIoU-SSD-pytorch](https://github.com/Zzh-tju/DIoU-SSD-pytorch)
3. Faster R-CNN [https://github.com/Zzh-tju/DIoU-pytorch-detectron](https://github.com/Zzh-tju/DIoU-pytorch-detectron)
4. 模拟实验 [https://github.com/Zzh-tju/DIoU](https://github.com/Zzh-tju/DIoU)
# YOLACT
### 代码位置与可选项
参见[layers/modules/multibox_loss.py](layers/modules/multibox_loss.py)中的`ciou` function,这是我们PyTorch实现的CIoU loss.
目前NMS支持两种模式:(见[eval.py](eval.py))
1. Cross-class模式,它将忽略类别,也就是所有类别混在一起处理。`cross_class_nms=True`,这将比per-class模式快一点,但性能会略微下降。
2. per-class模式,不同的类别分别NMS。(`cross_class_nms=False`)
目前,NMS支持这几种设置:`fast_nms`, `cluster_nms`, `cluster_diounms`, `spm`, `spm_dist`, `spm_dist_weighted`。
见[layers/functions/detection.py](layers/functions/detection.py),这是我们Cluster-NMS的PyTorch实现.
# 安装
要想使用YOLACT++, 确保你编译了DCNv2的代码.
- 下载本代码,并进入:
```Shell
git clone https://github.com/Zzh-tju/CIoU.git
cd yolact
```
- 使用以下的其中一个方法安装环境:
- 使用[Anaconda](https://www.anaconda.com/distribution/)
- 运行`conda env create -f environment.yml`
- 使用pip
- 创建Python3环境(例如使用virtenv).
- 安装[Pytorch](http://pytorch.org/) 1.0.1 (或更高版本)与TorchVision.
- 安装一些其它的包:
```Shell
# Cython需要在pycocotools之前安装
pip install cython
pip install opencv-python pillow pycocotools matplotlib
```
- 如要训练YOLACT, 下载COCO 2017数据集. 请注意,此脚本将花费一些时间,并将21G的文件转储到`./data/coco`.
```Shell
sh data/scripts/COCO.sh
```
- 如想在COCO `test-dev`上测试YOLACT, 需下载`test-dev`:
```Shell
sh data/scripts/COCO_test.sh
```
- 如需使用YOLACT++, 编译DCN layer ([DCNv2](https://github.com/CharlesShang/DCNv2/tree/pytorch_1.0)).
```Shell
cd external/DCNv2
python setup.py build develop
```
# 评估
以下是我们训练的YOLACT模型 (2020.5.5发布) 以及测试速度FPS,在单张GTX 1080 Ti上评估,所用测试集为`coco 2017 val`:
训练在双GPU上进行,使用如下命令:
`
python train.py --config=yolact_base_config --batch_size=8
`
| Image Size | Backbone | Loss | NMS | FPS | box AP | mask AP | Weights |
|:----:|:-------------:|:-------:|:----:|:----:|:----:|:----:|----------------------------------------------------------------------------------------------------------------------|
| 550 | Resnet101-FPN | SL1 | Fast NMS | 30.6 | 31.5 | 29.1 |[SL1.pth](https://share.weiyun.com/5N840Hm) |
| 550 | Resnet101-FPN | CIoU | Fast NMS | 30.6 | 32.1 | 29.6 | [CIoU.pth](https://share.weiyun.com/5EtJ4dJ) |
如要测试模型,请将权重放置在`./weights`目录下,并运行以下其中一个命令。
## COCO结果
```
# 在整个测试集上评估模型,这将输出一个COCOEval json文件,你可用于提交至COCO服务器(对于test-dev)或用run_coco_eval.py脚本直接评估(对于val 2017)。
# 以下命令将创建'./results/bbox_detections.json' 与 './results/mask_detections.json' ,分别对应目标检测与实例分割。
python eval.py --trained_model=weights/yolact_base_54_800000.pth --output_coco_json
# 运行以下命令来评估刚刚生成的json文件。
python run_coco_eval.py
# 如想生成一个test-dev的COCO json文件,请确保你下载了test-dev数据集,然后运行
python eval.py --trained_model=weights/yolact_base_54_800000.pth --output_coco_json --dataset=coco2017_testdev_dataset
```
## COCO检测结果可视化
```
# 一般会使用0.15的分类score阈值。
python eval.py --trained_model=weights/yolact_base_54_800000.pth --score_threshold=0.15 --top_k=15 --display
```
## Cluster-NMS 速度评估
```
python eval.py --trained_model=weights/yolact_base_54_800000.pth --benchmark
```
#### 设备
- 1 GTX 1080 Ti
- Intel(R) Core(TM) i7-6850K CPU @ 3.60GHz
| Image Size | Backbone | Loss | NMS | FPS | box AP | box AP75 | box AR100 | mask AP | mask AP75 | mask AR100 |
|:----:|:-------------:|:-------:|:------------------------------------:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|
| 550 | Resnet101-FPN | CIoU | Fast NMS |**30.6**| 32.1 | 33.9 | 43.0 | 29.6 | 30.9 | 40.3 |
| 550 | Resnet101-FPN | CIoU | Original NMS | 11.5 | 32.5 | 34.1 | 45.1 | 29.7 | 31.0 | 41.7 |
| 550 | Resnet101-FPN | CIoU | Cluster-NMS | 28.8 | 32.5 | 34.1 | 45.2 | 29.7 | 31.0 | 41.7 |
| 550 | Resnet101-FPN | CIoU | SPM Cluster-NMS | 28.6 | 33.1 | 35.2 | 48.8 |**30.3**|**31.7**| 43.6 |
| 550 | Resnet101-FPN | CIoU | SPM + Distance Cluster-NMS | 27.1 | 33.2 | 35.2 |**49.2**| 30.2 |**31.7**|**43.8**|
| 550 | Resnet101-FPN | CIoU | SPM + Distance + Weighted Cluster-NMS | 26.5 |**33.4**|**35.5**| 49.1 |**30.3**| 31.6 |**43.8**|
以下是使用YOLACT官方的预训练权重评估。([yolact_resnet50_54_800000.pth](https://ucdavis365-my.sharepoint.com/:u:/g/personal/yongjaelee_ucdavis_edu/EUVpxoSXaqNIlssoLKOEoCcB1m0RpzGq_Khp5n1VX3zcUw))
| Image Size | Backbone | Loss | NMS | FPS | box AP | box AP75 | box AR100 | mask AP | mask AP75 | mask AR100 |
|:----:|:-------------:|:-------:|:-----------------------------------:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|
| 550 | Resnet50-FPN | SL1 | Fast NMS |**41.6**| 30.2 | 31.9 | 42.0 | 28.0 | 29.1 | 39.4 |
| 550 | Resnet50-FPN | SL1 | Original NMS | 12.8 | 30.7 | 32.0 | 44.1 | 28.1 | 29.2 | 40.7 |
| 550 | Resnet50-FPN | SL1 | Cluster-NMS | 38.2 | 30.7 | 32.0 | 44.1 | 28.1 | 29.2 | 40.7 |
| 550 | Resnet50-FPN | SL1 | SPM Cluster-NMS | 37.7 | 31.3 | 33.2 | 48.0 |**28.8**|**29.9**| 42.8 |
| 550 | Resnet50-FPN | SL1 | SPM + Distance Cluster-NMS | 35.2 | 31.3 | 33.3 | 48.2 | 28.7 |**29.9**| 42.9 |
| 550 | Resnet50-FPN | SL1 | SPM + Distance + Weighted Cluster-NMS | 34.2 |**31.8**|**33.9**|**48.3**|**28.8**|**29.9**|**43.0**|
以下是使用YOLACT官方的预训练权重评估。([yolact_base_54_800000.pth](https://drive.google.com/file/d/1UYy3dMapbH1BnmtZU4WH1zbYgOzzHHf_/view?usp=sharing))
| Image Size | Backbone | Loss | NMS | FPS | box AP | box AP75 | box AR100 | mask AP | mask AP75 | mask AR100 |
|:----:|:-------------:|:-------:|:-----------------------------------:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|
| 550 | Resnet101-FPN | SL1 | Fast NMS |**30.6**| 32.5 | 34.6 | 43.9 | 29.8 | 31.3 | 40.8 |
| 550 | Resnet101-FPN | SL1 | Original NMS | 11.9 | 32.9 | 34.8 | 45.8 | 29.9 | 31.4 | 42.1 |
| 550 | Resnet101-FPN | SL1 | Cluster-NMS | 29.2 | 32.9 | 34.8 | 45.9 | 29.9 | 31.4 | 42.1 |
| 550 | Resnet101-FPN | SL1 | SPM Cluster-NMS | 28.8 | 33.5 | 35.9 | 49.7 |**30.5**|**32.1**| 44.1 |
| 550 | Resnet101-FPN | SL1 | SPM + Distance Cluster-NMS | 27.5 | 33.5 | 35.9 |**50.2**| 30.4 | 32.0 |**44.3**|
| 550 | Resnet101-FPN | SL1 | SPM + Distance + Weighted Cluster-NMS | 26.7 |**34.0**|**36.6**| 49.9 |**30.5**| 32.0 |**44.3**|
以下是YOLACT++,同样是官方的预训练权重评估。([yolact_plus_base_54_800000.pth](https://ucdavis365-my.sharepoint.com/:u:/g/personal/yongjaelee_ucdavis_edu/EVQ62sF0SrJPrl_68onyHF8BpG7c05A8PavV4a849sZgEA))
| Image Size | Backbone | Loss | NMS | FPS | box AP | box AP75 | box AR100 | mask AP | mask AP75 | mask AR100 |
|:----:|:-------------:|:-------:|:-----------------------------------:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|
| 550 | Resnet101-FPN | SL1 | Fast NMS |**25.1**| 35.8 | 38.7 | 45.5 | 34.4 | 36.8 | 42.6 |
| 550 | Resnet101-FPN | SL1 | Original NMS | 10.9 | 36.4 | 39.1 | 48.0 | 34.7 | 37.1 | 44.1 |
| 550 | Resnet101-FPN | SL1 | Cluster-NMS | 23.7 | 36.4 | 39.1 | 48.0 | 34.7 | 37.1 | 44.1 |
| 550 | Resnet101-FPN | SL1 | SPM Cluster-NMS | 23.2 | 36.9 | 40.1 | 52.8 |**35.0**| 37.5 |**46.3**|
| 550 | Resnet101-FPN | SL1 | SPM + Distance Cluster-NMS | 22.0 | 36.9 | 40.2 |**53.0**| 34.9 | 37.5 |**46.3**|
| 550 | Resnet101-FPN | SL1 | SPM + Distance + Weighted Cluster-NMS | 21.7 |**37.4**|**40.6**| 52.5 |**35.0**|**37.6**|**46.3**|
#### 注:
- 我们还测试了SPM + Distance + Weighted Cluster-NMS策略,其中坐标加权仅针对`IoU> 0.8`的框,我们发现`IoU>0.5`对于YOLACT来说不够好,而`IoU>0.9`又几乎与`SPM + Distance Cluster-NMS`相当. (参见[CAD](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8265304)了解更多Weighted-NMS的细节。)
- YOLACT官方提供的Original NMS要比本代码的快,因为其预先使用了分类score阈值 (0.05)来过滤了大量的框,以YOLACT ResNet101-FPN为例,约22 ~ 23 FPS,但性能会有下降,为了确保相同的性能,我们不使用该预先分类score阈值。
- 注意到Torchvision NMS拥有最快的速度,这是由于CUDA编写与工程化加速(如只计算上三角IoU矩阵)。而我们的Cluster-NMS只需更少的迭代,并也可以进一步工程化加速。
- 目前Torchvision NMS使用IoU为评价准则,而不是DIoU。然而,如果我们直接在Original NMS中替换IoU为DIoU,得到DIoU-NMS,这将导致更大的计算开销。现在,Cluster-DIoU-NMS将大大加速DIoU-NMS,并保持与DIoU-NMS一样的精度。
- Torchvision NMS是Torchvision>=0.3中的函数,而我们的Cluster-NMS可以应用于任何低版本Torchvision的代码库或其他的深度学习框架,只要其可以进行矩阵运算。**无需其他import,无需编译,更少的迭代,完全GPU加速,以及更好的性能**。
## 可视化
```Shell
# 如下命令检测特定的图片。
python eval.py --trained_model=weights/yolact_base_54_800000.pth --score_threshold=0.15 --top_k=15 --image=my_image.png
# 检测特定的图片并保存。
python eval.py --trained_model=weights/yolact_base_54_800000.pth --score_threshold=0.15 --top_k=15 --image=input_image.png:output_image.png
# 检测一个目录中的所有图片。
python eval.py --trained_model=weights/yolact_base_54_800000.pth --score_threshold=0.15 --top_k=15 --images=path/to/input/folder:path/to/output/folder
```
## 视频检测
```Shell
# 实时检测视频流,"--video_multiframe" 将一次性检测多帧以提高性能。
# 使用"--display_fps"将打印FPS在每一帧上。
python eval.py --trained_model=weights/yolact_base_54_800000.pth --score_threshold=0.15 --top_k=15 --video_multiframe=4 --video=my_video.mp4
# 实时显示网络摄像头。如果你有多个网络摄像头,请传递所需网络摄像头的索引,而不是0。
python eval.py --trained_model=weights/yolact_base_54_800000.pth --score_threshold=0.15 --top_k=15 --video_multiframe=4 --video=0
# 处理视频并保存。
python eval.py --trained_model=weights/yolact_base_54_800000.pth --score_threshold=0.15 --top_k=15 --video_multiframe=4 --video=input_video.mp4:output_video.mp4
```
正如你看到的,`eval.py`能做很多事情,添加`--help`命令查看它所能做的一切。
```Shell
python eval.py --help
```
# 训练
默认训练COCO。
- 准备ImageNet预训练模型并将其放置于`./weights`目录。
- [resnet101_reducedfc.pth](https://drive.google.com/file/d/1tvqFPd4bJtakOlmn-uIA492g2qurRChj/view?usp=sharing).
- [resnet50-19c8e357.pth](https://drive.google.com/file/d/1Jy3yCdbatgXa5YYIdTCRrSV0S9V5g1rn/view?usp=sharing).
- [darknet53.pth](https://drive.google.com/file/d/17Y431j4sagFpSReuPNoFcj9h7azDTZFf/view?usp=sharing).
- 运行以下其中一个训练命令。
- 训练过程中如按下ctrl+c将保存一个中断的权重`*_interrupt.pth`。
- 所有权重将保存于`./weights`,文件名为`<config>_<epoch>_<iter>.pth`.
```Shell
# 默认batch size 8的训练,使用base config。
python train.py --config=yolact_base_config
# 训练yolact_base_config, batch_size为5. 对于550px模型,1 batch占用约1.5 gigs的VRAM,因此请相应指定。
python train.py --config=yolact_base_config --batch_size=5
# 从指定的权重接着训练,并于其文件名的轮数开始训。
python train.py --config=yolact_base_config --resume=weights/yolact_base_10_32100.pth --start_iter=-1
# 使用`--help`选项查看所有可用命令行参数的说明。
python train.py --help
```
## Multi-GPU
YOLACT可支持多GPU训练:
- 在运行任何脚本之前,你可以运行 `export CUDA_VISIBLE_DEVICES=[gpus]`
- 你可以使用一个序号列表来替换[gpus],例如0,1,2,3。
- 只要一张卡也可以这样做。
- 用`nvidia-smi`命令可查看可用GPU的序号。
- 接着,简单设置batch size为`8*num_gpus`. 训练脚本将自动将超参数缩放到正确的值。
- 如果你显存够用,你可以继续增大batch size,但要将其保持为正在使用的GPU数量的倍数。
- 如果要为不同的GPU分配不同数量的图片,使用 `--batch_alloc=[alloc]`,其中[alloc]是一个逗号分隔列表,包含每个GPU上的图像数。该值的总和必须为`batch_size`。
## 致谢
感谢[Daniel Bolya](https://github.com/dbolya/)的[YOLACT & YOLACT++](https://github.com/dbolya/yolact), 这是一项实时实例分割的杰出工作。
================================================
FILE: backbone.py
================================================
import torch
import torch.nn as nn
import pickle
from collections import OrderedDict
try:
from dcn_v2 import DCN
except ImportError:
def DCN(*args, **kwdargs):
raise Exception('DCN could not be imported. If you want to use YOLACT++ models, compile DCN. Check the README for instructions.')
class Bottleneck(nn.Module):
""" Adapted from torchvision.models.resnet """
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=nn.BatchNorm2d, dilation=1, use_dcn=False):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False, dilation=dilation)
self.bn1 = norm_layer(planes)
if use_dcn:
self.conv2 = DCN(planes, planes, kernel_size=3, stride=stride,
padding=dilation, dilation=dilation, deformable_groups=1)
self.conv2.bias.data.zero_()
self.conv2.conv_offset_mask.weight.data.zero_()
self.conv2.conv_offset_mask.bias.data.zero_()
else:
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=dilation, bias=False, dilation=dilation)
self.bn2 = norm_layer(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False, dilation=dilation)
self.bn3 = norm_layer(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNetBackbone(nn.Module):
""" Adapted from torchvision.models.resnet """
def __init__(self, layers, dcn_layers=[0, 0, 0, 0], dcn_interval=1, atrous_layers=[], block=Bottleneck, norm_layer=nn.BatchNorm2d):
super().__init__()
# These will be populated by _make_layer
self.num_base_layers = len(layers)
self.layers = nn.ModuleList()
self.channels = []
self.norm_layer = norm_layer
self.dilation = 1
self.atrous_layers = atrous_layers
# From torchvision.models.resnet.Resnet
self.inplanes = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = norm_layer(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self._make_layer(block, 64, layers[0], dcn_layers=dcn_layers[0], dcn_interval=dcn_interval)
self._make_layer(block, 128, layers[1], stride=2, dcn_layers=dcn_layers[1], dcn_interval=dcn_interval)
self._make_layer(block, 256, layers[2], stride=2, dcn_layers=dcn_layers[2], dcn_interval=dcn_interval)
self._make_layer(block, 512, layers[3], stride=2, dcn_layers=dcn_layers[3], dcn_interval=dcn_interval)
# This contains every module that should be initialized by loading in pretrained weights.
# Any extra layers added onto this that won't be initialized by init_backbone will not be
# in this list. That way, Yolact::init_weights knows which backbone weights to initialize
# with xavier, and which ones to leave alone.
self.backbone_modules = [m for m in self.modules() if isinstance(m, nn.Conv2d)]
def _make_layer(self, block, planes, blocks, stride=1, dcn_layers=0, dcn_interval=1):
""" Here one layer means a string of n Bottleneck blocks. """
downsample = None
# This is actually just to create the connection between layers, and not necessarily to
# downsample. Even if the second condition is met, it only downsamples when stride != 1
if stride != 1 or self.inplanes != planes * block.expansion:
if len(self.layers) in self.atrous_layers:
self.dilation += 1
stride = 1
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False,
dilation=self.dilation),
self.norm_layer(planes * block.expansion),
)
layers = []
use_dcn = (dcn_layers >= blocks)
layers.append(block(self.inplanes, planes, stride, downsample, self.norm_layer, self.dilation, use_dcn=use_dcn))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
use_dcn = ((i+dcn_layers) >= blocks) and (i % dcn_interval == 0)
layers.append(block(self.inplanes, planes, norm_layer=self.norm_layer, use_dcn=use_dcn))
layer = nn.Sequential(*layers)
self.channels.append(planes * block.expansion)
self.layers.append(layer)
return layer
def forward(self, x):
""" Returns a list of convouts for each layer. """
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
outs = []
for layer in self.layers:
x = layer(x)
outs.append(x)
return tuple(outs)
def init_backbone(self, path):
""" Initializes the backbone weights for training. """
state_dict = torch.load(path)
# Replace layer1 -> layers.0 etc.
keys = list(state_dict)
for key in keys:
if key.startswith('layer'):
idx = int(key[5])
new_key = 'layers.' + str(idx-1) + key[6:]
state_dict[new_key] = state_dict.pop(key)
# Note: Using strict=False is berry scary. Triple check this.
self.load_state_dict(state_dict, strict=False)
def add_layer(self, conv_channels=1024, downsample=2, depth=1, block=Bottleneck):
""" Add a downsample layer to the backbone as per what SSD does. """
self._make_layer(block, conv_channels // block.expansion, blocks=depth, stride=downsample)
class ResNetBackboneGN(ResNetBackbone):
def __init__(self, layers, num_groups=32):
super().__init__(layers, norm_layer=lambda x: nn.GroupNorm(num_groups, x))
def init_backbone(self, path):
""" The path here comes from detectron. So we load it differently. """
with open(path, 'rb') as f:
state_dict = pickle.load(f, encoding='latin1') # From the detectron source
state_dict = state_dict['blobs']
our_state_dict_keys = list(self.state_dict().keys())
new_state_dict = {}
gn_trans = lambda x: ('gn_s' if x == 'weight' else 'gn_b')
layeridx2res = lambda x: 'res' + str(int(x)+2)
block2branch = lambda x: 'branch2' + ('a', 'b', 'c')[int(x[-1:])-1]
# Transcribe each Detectron weights name to a Yolact weights name
for key in our_state_dict_keys:
parts = key.split('.')
transcribed_key = ''
if (parts[0] == 'conv1'):
transcribed_key = 'conv1_w'
elif (parts[0] == 'bn1'):
transcribed_key = 'conv1_' + gn_trans(parts[1])
elif (parts[0] == 'layers'):
if int(parts[1]) >= self.num_base_layers: continue
transcribed_key = layeridx2res(parts[1])
transcribed_key += '_' + parts[2] + '_'
if parts[3] == 'downsample':
transcribed_key += 'branch1_'
if parts[4] == '0':
transcribed_key += 'w'
else:
transcribed_key += gn_trans(parts[5])
else:
transcribed_key += block2branch(parts[3]) + '_'
if 'conv' in parts[3]:
transcribed_key += 'w'
else:
transcribed_key += gn_trans(parts[4])
new_state_dict[key] = torch.Tensor(state_dict[transcribed_key])
# strict=False because we may have extra unitialized layers at this point
self.load_state_dict(new_state_dict, strict=False)
def darknetconvlayer(in_channels, out_channels, *args, **kwdargs):
"""
Implements a conv, activation, then batch norm.
Arguments are passed into the conv layer.
"""
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, *args, **kwdargs, bias=False),
nn.BatchNorm2d(out_channels),
# Darknet uses 0.1 here.
# See https://github.com/pjreddie/darknet/blob/680d3bde1924c8ee2d1c1dea54d3e56a05ca9a26/src/activations.h#L39
nn.LeakyReLU(0.1, inplace=True)
)
class DarkNetBlock(nn.Module):
""" Note: channels is the lesser of the two. The output will be expansion * channels. """
expansion = 2
def __init__(self, in_channels, channels):
super().__init__()
self.conv1 = darknetconvlayer(in_channels, channels, kernel_size=1)
self.conv2 = darknetconvlayer(channels, channels * self.expansion, kernel_size=3, padding=1)
def forward(self, x):
return self.conv2(self.conv1(x)) + x
class DarkNetBackbone(nn.Module):
"""
An implementation of YOLOv3's Darnet53 in
https://pjreddie.com/media/files/papers/YOLOv3.pdf
This is based off of the implementation of Resnet above.
"""
def __init__(self, layers=[1, 2, 8, 8, 4], block=DarkNetBlock):
super().__init__()
# These will be populated by _make_layer
self.num_base_layers = len(layers)
self.layers = nn.ModuleList()
self.channels = []
self._preconv = darknetconvlayer(3, 32, kernel_size=3, padding=1)
self.in_channels = 32
self._make_layer(block, 32, layers[0])
self._make_layer(block, 64, layers[1])
self._make_layer(block, 128, layers[2])
self._make_layer(block, 256, layers[3])
self._make_layer(block, 512, layers[4])
# This contains every module that should be initialized by loading in pretrained weights.
# Any extra layers added onto this that won't be initialized by init_backbone will not be
# in this list. That way, Yolact::init_weights knows which backbone weights to initialize
# with xavier, and which ones to leave alone.
self.backbone_modules = [m for m in self.modules() if isinstance(m, nn.Conv2d)]
def _make_layer(self, block, channels, num_blocks, stride=2):
""" Here one layer means a string of n blocks. """
layer_list = []
# The downsample layer
layer_list.append(
darknetconvlayer(self.in_channels, channels * block.expansion,
kernel_size=3, padding=1, stride=stride))
# Each block inputs channels and outputs channels * expansion
self.in_channels = channels * block.expansion
layer_list += [block(self.in_channels, channels) for _ in range(num_blocks)]
self.channels.append(self.in_channels)
self.layers.append(nn.Sequential(*layer_list))
def forward(self, x):
""" Returns a list of convouts for each layer. """
x = self._preconv(x)
outs = []
for layer in self.layers:
x = layer(x)
outs.append(x)
return tuple(outs)
def add_layer(self, conv_channels=1024, stride=2, depth=1, block=DarkNetBlock):
""" Add a downsample layer to the backbone as per what SSD does. """
self._make_layer(block, conv_channels // block.expansion, num_blocks=depth, stride=stride)
def init_backbone(self, path):
""" Initializes the backbone weights for training. """
# Note: Using strict=False is berry scary. Triple check this.
self.load_state_dict(torch.load(path), strict=False)
class VGGBackbone(nn.Module):
"""
Args:
- cfg: A list of layers given as lists. Layers can be either 'M' signifying
a max pooling layer, a number signifying that many feature maps in
a conv layer, or a tuple of 'M' or a number and a kwdargs dict to pass
into the function that creates the layer (e.g. nn.MaxPool2d for 'M').
- extra_args: A list of lists of arguments to pass into add_layer.
- norm_layers: Layers indices that need to pass through an l2norm layer.
"""
def __init__(self, cfg, extra_args=[], norm_layers=[]):
super().__init__()
self.channels = []
self.layers = nn.ModuleList()
self.in_channels = 3
self.extra_args = list(reversed(extra_args)) # So I can use it as a stack
# Keeps track of what the corresponding key will be in the state dict of the
# pretrained model. For instance, layers.0.2 for us is 2 for the pretrained
# model but layers.1.1 is 5.
self.total_layer_count = 0
self.state_dict_lookup = {}
for idx, layer_cfg in enumerate(cfg):
self._make_layer(layer_cfg)
self.norms = nn.ModuleList([nn.BatchNorm2d(self.channels[l]) for l in norm_layers])
self.norm_lookup = {l: idx for idx, l in enumerate(norm_layers)}
# These modules will be initialized by init_backbone,
# so don't overwrite their initialization later.
self.backbone_modules = [m for m in self.modules() if isinstance(m, nn.Conv2d)]
def _make_layer(self, cfg):
"""
Each layer is a sequence of conv layers usually preceded by a max pooling.
Adapted from torchvision.models.vgg.make_layers.
"""
layers = []
for v in cfg:
# VGG in SSD requires some special layers, so allow layers to be tuples of
# (<M or num_features>, kwdargs dict)
args = None
if isinstance(v, tuple):
args = v[1]
v = v[0]
# v should be either M or a number
if v == 'M':
# Set default arguments
if args is None:
args = {'kernel_size': 2, 'stride': 2}
layers.append(nn.MaxPool2d(**args))
else:
# See the comment in __init__ for an explanation of this
cur_layer_idx = self.total_layer_count + len(layers)
self.state_dict_lookup[cur_layer_idx] = '%d.%d' % (len(self.layers), len(layers))
# Set default arguments
if args is None:
args = {'kernel_size': 3, 'padding': 1}
# Add the layers
layers.append(nn.Conv2d(self.in_channels, v, **args))
layers.append(nn.ReLU(inplace=True))
self.in_channels = v
self.total_layer_count += len(layers)
self.channels.append(self.in_channels)
self.layers.append(nn.Sequential(*layers))
def forward(self, x):
""" Returns a list of convouts for each layer. """
outs = []
for idx, layer in enumerate(self.layers):
x = layer(x)
# Apply an l2norm module to the selected layers
# Note that this differs from the original implemenetation
if idx in self.norm_lookup:
x = self.norms[self.norm_lookup[idx]](x)
outs.append(x)
return tuple(outs)
def transform_key(self, k):
""" Transform e.g. features.24.bias to layers.4.1.bias """
vals = k.split('.')
layerIdx = self.state_dict_lookup[int(vals[0])]
return 'layers.%s.%s' % (layerIdx, vals[1])
def init_backbone(self, path):
""" Initializes the backbone weights for training. """
state_dict = torch.load(path)
state_dict = OrderedDict([(self.transform_key(k), v) for k,v in state_dict.items()])
self.load_state_dict(state_dict, strict=False)
def add_layer(self, conv_channels=128, downsample=2):
""" Add a downsample layer to the backbone as per what SSD does. """
if len(self.extra_args) > 0:
conv_channels, downsample = self.extra_args.pop()
padding = 1 if downsample > 1 else 0
layer = nn.Sequential(
nn.Conv2d(self.in_channels, conv_channels, kernel_size=1),
nn.ReLU(inplace=True),
nn.Conv2d(conv_channels, conv_channels*2, kernel_size=3, stride=downsample, padding=padding),
nn.ReLU(inplace=True)
)
self.in_channels = conv_channels*2
self.channels.append(self.in_channels)
self.layers.append(layer)
def construct_backbone(cfg):
""" Constructs a backbone given a backbone config object (see config.py). """
backbone = cfg.type(*cfg.args)
# Add downsampling layers until we reach the number we need
num_layers = max(cfg.selected_layers) + 1
while len(backbone.layers) < num_layers:
backbone.add_layer()
return backbone
================================================
FILE: data/__init__.py
================================================
from .config import *
from .coco import *
import torch
import cv2
import numpy as np
================================================
FILE: data/coco.py
================================================
import os
import os.path as osp
import sys
import torch
import torch.utils.data as data
import torch.nn.functional as F
import cv2
import numpy as np
from .config import cfg
from pycocotools import mask as maskUtils
import random
def get_label_map():
if cfg.dataset.label_map is None:
return {x+1: x+1 for x in range(len(cfg.dataset.class_names))}
else:
return cfg.dataset.label_map
class COCOAnnotationTransform(object):
"""Transforms a COCO annotation into a Tensor of bbox coords and label index
Initilized with a dictionary lookup of classnames to indexes
"""
def __init__(self):
self.label_map = get_label_map()
def __call__(self, target, width, height):
"""
Args:
target (dict): COCO target json annotation as a python dict
height (int): height
width (int): width
Returns:
a list containing lists of bounding boxes [bbox coords, class idx]
"""
scale = np.array([width, height, width, height])
res = []
for obj in target:
if 'bbox' in obj:
bbox = obj['bbox']
label_idx = obj['category_id']
if label_idx >= 0:
label_idx = self.label_map[label_idx] - 1
final_box = list(np.array([bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]])/scale)
final_box.append(label_idx)
res += [final_box] # [xmin, ymin, xmax, ymax, label_idx]
else:
print("No bbox found for object ", obj)
return res
class COCODetection(data.Dataset):
"""`MS Coco Detection <http://mscoco.org/dataset/#detections-challenge2016>`_ Dataset.
Args:
root (string): Root directory where images are downloaded to.
set_name (string): Name of the specific set of COCO images.
transform (callable, optional): A function/transform that augments the
raw images`
target_transform (callable, optional): A function/transform that takes
in the target (bbox) and transforms it.
prep_crowds (bool): Whether or not to prepare crowds for the evaluation step.
"""
def __init__(self, image_path, info_file, transform=None,
target_transform=None,
dataset_name='MS COCO', has_gt=True):
# Do this here because we have too many things named COCO
from pycocotools.coco import COCO
if target_transform is None:
target_transform = COCOAnnotationTransform()
self.root = image_path
self.coco = COCO(info_file)
self.ids = list(self.coco.imgToAnns.keys())
if len(self.ids) == 0 or not has_gt:
self.ids = list(self.coco.imgs.keys())
self.transform = transform
self.target_transform = COCOAnnotationTransform()
self.name = dataset_name
self.has_gt = has_gt
def __getitem__(self, index):
"""
Args:
index (int): Index
Returns:
tuple: Tuple (image, (target, masks, num_crowds)).
target is the object returned by ``coco.loadAnns``.
"""
im, gt, masks, h, w, num_crowds = self.pull_item(index)
return im, (gt, masks, num_crowds)
def __len__(self):
return len(self.ids)
def pull_item(self, index):
"""
Args:
index (int): Index
Returns:
tuple: Tuple (image, target, masks, height, width, crowd).
target is the object returned by ``coco.loadAnns``.
Note that if no crowd annotations exist, crowd will be None
"""
img_id = self.ids[index]
if self.has_gt:
ann_ids = self.coco.getAnnIds(imgIds=img_id)
# Target has {'segmentation', 'area', iscrowd', 'image_id', 'bbox', 'category_id'}
target = [x for x in self.coco.loadAnns(ann_ids) if x['image_id'] == img_id]
else:
target = []
# Separate out crowd annotations. These are annotations that signify a large crowd of
# objects of said class, where there is no annotation for each individual object. Both
# during testing and training, consider these crowds as neutral.
crowd = [x for x in target if ('iscrowd' in x and x['iscrowd'])]
target = [x for x in target if not ('iscrowd' in x and x['iscrowd'])]
num_crowds = len(crowd)
for x in crowd:
x['category_id'] = -1
# This is so we ensure that all crowd annotations are at the end of the array
target += crowd
# The split here is to have compatibility with both COCO2014 and 2017 annotations.
# In 2014, images have the pattern COCO_{train/val}2014_%012d.jpg, while in 2017 it's %012d.jpg.
# Our script downloads the images as %012d.jpg so convert accordingly.
file_name = self.coco.loadImgs(img_id)[0]['file_name']
if file_name.startswith('COCO'):
file_name = file_name.split('_')[-1]
path = osp.join(self.root, file_name)
assert osp.exists(path), 'Image path does not exist: {}'.format(path)
img = cv2.imread(path)
height, width, _ = img.shape
if len(target) > 0:
# Pool all the masks for this image into one [num_objects,height,width] matrix
masks = [self.coco.annToMask(obj).reshape(-1) for obj in target]
masks = np.vstack(masks)
masks = masks.reshape(-1, height, width)
if self.target_transform is not None and len(target) > 0:
target = self.target_transform(target, width, height)
if self.transform is not None:
if len(target) > 0:
target = np.array(target)
img, masks, boxes, labels = self.transform(img, masks, target[:, :4],
{'num_crowds': num_crowds, 'labels': target[:, 4]})
# I stored num_crowds in labels so I didn't have to modify the entirety of augmentations
num_crowds = labels['num_crowds']
labels = labels['labels']
target = np.hstack((boxes, np.expand_dims(labels, axis=1)))
else:
img, _, _, _ = self.transform(img, np.zeros((1, height, width), dtype=np.float), np.array([[0, 0, 1, 1]]),
{'num_crowds': 0, 'labels': np.array([0])})
masks = None
target = None
if target.shape[0] == 0:
print('Warning: Augmentation output an example with no ground truth. Resampling...')
return self.pull_item(random.randint(0, len(self.ids)-1))
return torch.from_numpy(img).permute(2, 0, 1), target, masks, height, width, num_crowds
def pull_image(self, index):
'''Returns the original image object at index in PIL form
Note: not using self.__getitem__(), as any transformations passed in
could mess up this functionality.
Argument:
index (int): index of img to show
Return:
cv2 img
'''
img_id = self.ids[index]
path = self.coco.loadImgs(img_id)[0]['file_name']
return cv2.imread(osp.join(self.root, path), cv2.IMREAD_COLOR)
def pull_anno(self, index):
'''Returns the original annotation of image at index
Note: not using self.__getitem__(), as any transformations passed in
could mess up this functionality.
Argument:
index (int): index of img to get annotation of
Return:
list: [img_id, [(label, bbox coords),...]]
eg: ('001718', [('dog', (96, 13, 438, 332))])
'''
img_id = self.ids[index]
ann_ids = self.coco.getAnnIds(imgIds=img_id)
return self.coco.loadAnns(ann_ids)
def __repr__(self):
fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
fmt_str += ' Number of datapoints: {}\n'.format(self.__len__())
fmt_str += ' Root Location: {}\n'.format(self.root)
tmp = ' Transforms (if any): '
fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
tmp = ' Target Transforms (if any): '
fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
return fmt_str
def enforce_size(img, targets, masks, num_crowds, new_w, new_h):
""" Ensures that the image is the given size without distorting aspect ratio. """
with torch.no_grad():
_, h, w = img.size()
if h == new_h and w == new_w:
return img, targets, masks, num_crowds
# Resize the image so that it fits within new_w, new_h
w_prime = new_w
h_prime = h * new_w / w
if h_prime > new_h:
w_prime *= new_h / h_prime
h_prime = new_h
w_prime = int(w_prime)
h_prime = int(h_prime)
# Do all the resizing
img = F.interpolate(img.unsqueeze(0), (h_prime, w_prime), mode='bilinear', align_corners=False)
img.squeeze_(0)
# Act like each object is a color channel
masks = F.interpolate(masks.unsqueeze(0), (h_prime, w_prime), mode='bilinear', align_corners=False)
masks.squeeze_(0)
# Scale bounding boxes (this will put them in the top left corner in the case of padding)
targets[:, [0, 2]] *= (w_prime / new_w)
targets[:, [1, 3]] *= (h_prime / new_h)
# Finally, pad everything to be the new_w, new_h
pad_dims = (0, new_w - w_prime, 0, new_h - h_prime)
img = F.pad( img, pad_dims, mode='constant', value=0)
masks = F.pad(masks, pad_dims, mode='constant', value=0)
return img, targets, masks, num_crowds
def detection_collate(batch):
"""Custom collate fn for dealing with batches of images that have a different
number of associated object annotations (bounding boxes).
Arguments:
batch: (tuple) A tuple of tensor images and (lists of annotations, masks)
Return:
A tuple containing:
1) (tensor) batch of images stacked on their 0 dim
2) (list<tensor>, list<tensor>, list<int>) annotations for a given image are stacked
on 0 dim. The output gt is a tuple of annotations and masks.
"""
targets = []
imgs = []
masks = []
num_crowds = []
for sample in batch:
imgs.append(sample[0])
targets.append(torch.FloatTensor(sample[1][0]))
masks.append(torch.FloatTensor(sample[1][1]))
num_crowds.append(sample[1][2])
return imgs, (targets, masks, num_crowds)
================================================
FILE: data/config.py
================================================
from backbone import ResNetBackbone, VGGBackbone, ResNetBackboneGN, DarkNetBackbone
from math import sqrt
import torch
# for making bounding boxes pretty
COLORS = ((244, 67, 54),
(233, 30, 99),
(156, 39, 176),
(103, 58, 183),
( 63, 81, 181),
( 33, 150, 243),
( 3, 169, 244),
( 0, 188, 212),
( 0, 150, 136),
( 76, 175, 80),
(139, 195, 74),
(205, 220, 57),
(255, 235, 59),
(255, 193, 7),
(255, 152, 0),
(255, 87, 34),
(121, 85, 72),
(158, 158, 158),
( 96, 125, 139))
# These are in BGR and are for ImageNet
MEANS = (103.94, 116.78, 123.68)
STD = (57.38, 57.12, 58.40)
COCO_CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
'scissors', 'teddy bear', 'hair drier', 'toothbrush')
COCO_LABEL_MAP = { 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8,
9: 9, 10: 10, 11: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16,
18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24,
27: 25, 28: 26, 31: 27, 32: 28, 33: 29, 34: 30, 35: 31, 36: 32,
37: 33, 38: 34, 39: 35, 40: 36, 41: 37, 42: 38, 43: 39, 44: 40,
46: 41, 47: 42, 48: 43, 49: 44, 50: 45, 51: 46, 52: 47, 53: 48,
54: 49, 55: 50, 56: 51, 57: 52, 58: 53, 59: 54, 60: 55, 61: 56,
62: 57, 63: 58, 64: 59, 65: 60, 67: 61, 70: 62, 72: 63, 73: 64,
74: 65, 75: 66, 76: 67, 77: 68, 78: 69, 79: 70, 80: 71, 81: 72,
82: 73, 84: 74, 85: 75, 86: 76, 87: 77, 88: 78, 89: 79, 90: 80}
# ----------------------- CONFIG CLASS ----------------------- #
class Config(object):
"""
Holds the configuration for anything you want it to.
To get the currently active config, call get_cfg().
To use, just do cfg.x instead of cfg['x'].
I made this because doing cfg['x'] all the time is dumb.
"""
def __init__(self, config_dict):
for key, val in config_dict.items():
self.__setattr__(key, val)
def copy(self, new_config_dict={}):
"""
Copies this config into a new config object, making
the changes given by new_config_dict.
"""
ret = Config(vars(self))
for key, val in new_config_dict.items():
ret.__setattr__(key, val)
return ret
def replace(self, new_config_dict):
"""
Copies new_config_dict into this config object.
Note: new_config_dict can also be a config object.
"""
if isinstance(new_config_dict, Config):
new_config_dict = vars(new_config_dict)
for key, val in new_config_dict.items():
self.__setattr__(key, val)
def print(self):
for k, v in vars(self).items():
print(k, ' = ', v)
# ----------------------- DATASETS ----------------------- #
dataset_base = Config({
'name': 'Base Dataset',
# Training images and annotations
'train_images': './data/coco/images/',
'train_info': 'path_to_annotation_file',
# Validation images and annotations.
'valid_images': './data/coco/images/',
'valid_info': 'path_to_annotation_file',
# Whether or not to load GT. If this is False, eval.py quantitative evaluation won't work.
'has_gt': True,
# A list of names for each of you classes.
'class_names': COCO_CLASSES,
# COCO class ids aren't sequential, so this is a bandage fix. If your ids aren't sequential,
# provide a map from category_id -> index in class_names + 1 (the +1 is there because it's 1-indexed).
# If not specified, this just assumes category ids start at 1 and increase sequentially.
'label_map': None
})
coco2014_dataset = dataset_base.copy({
'name': 'COCO 2014',
'train_info': './data/coco/annotations/instances_train2014.json',
'valid_info': './data/coco/annotations/instances_val2014.json',
'label_map': COCO_LABEL_MAP
})
coco2017_dataset = dataset_base.copy({
'name': 'COCO 2017',
'train_info': './data/coco/annotations/instances_train2017.json',
'valid_info': './data/coco/annotations/instances_val2017.json',
'label_map': COCO_LABEL_MAP
})
coco2017_testdev_dataset = dataset_base.copy({
'name': 'COCO 2017 Test-Dev',
'valid_info': './data/coco/annotations/image_info_test-dev2017.json',
'has_gt': False,
'label_map': COCO_LABEL_MAP
})
PASCAL_CLASSES = ("aeroplane", "bicycle", "bird", "boat", "bottle",
"bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person", "pottedplant",
"sheep", "sofa", "train", "tvmonitor")
pascal_sbd_dataset = dataset_base.copy({
'name': 'Pascal SBD 2012',
'train_images': './data/sbd/img',
'valid_images': './data/sbd/img',
'train_info': './data/sbd/pascal_sbd_train.json',
'valid_info': './data/sbd/pascal_sbd_val.json',
'class_names': PASCAL_CLASSES,
})
# ----------------------- TRANSFORMS ----------------------- #
resnet_transform = Config({
'channel_order': 'RGB',
'normalize': True,
'subtract_means': False,
'to_float': False,
})
vgg_transform = Config({
# Note that though vgg is traditionally BGR,
# the channel order of vgg_reducedfc.pth is RGB.
'channel_order': 'RGB',
'normalize': False,
'subtract_means': True,
'to_float': False,
})
darknet_transform = Config({
'channel_order': 'RGB',
'normalize': False,
'subtract_means': False,
'to_float': True,
})
# ----------------------- BACKBONES ----------------------- #
backbone_base = Config({
'name': 'Base Backbone',
'path': 'path/to/pretrained/weights',
'type': object,
'args': tuple(),
'transform': resnet_transform,
'selected_layers': list(),
'pred_scales': list(),
'pred_aspect_ratios': list(),
'use_pixel_scales': False,
'preapply_sqrt': True,
'use_square_anchors': False,
})
resnet101_backbone = backbone_base.copy({
'name': 'ResNet101',
'path': 'resnet101_reducedfc.pth',
'type': ResNetBackbone,
'args': ([3, 4, 23, 3],),
'transform': resnet_transform,
'selected_layers': list(range(2, 8)),
'pred_scales': [[1]]*6,
'pred_aspect_ratios': [ [[0.66685089, 1.7073535, 0.87508774, 1.16524493, 0.49059086]] ] * 6,
})
resnet101_gn_backbone = backbone_base.copy({
'name': 'ResNet101_GN',
'path': 'R-101-GN.pkl',
'type': ResNetBackboneGN,
'args': ([3, 4, 23, 3],),
'transform': resnet_transform,
'selected_layers': list(range(2, 8)),
'pred_scales': [[1]]*6,
'pred_aspect_ratios': [ [[0.66685089, 1.7073535, 0.87508774, 1.16524493, 0.49059086]] ] * 6,
})
resnet101_dcn_inter3_backbone = resnet101_backbone.copy({
'name': 'ResNet101_DCN_Interval3',
'args': ([3, 4, 23, 3], [0, 4, 23, 3], 3),
})
resnet50_backbone = resnet101_backbone.copy({
'name': 'ResNet50',
'path': 'resnet50-19c8e357.pth',
'type': ResNetBackbone,
'args': ([3, 4, 6, 3],),
'transform': resnet_transform,
})
resnet50_dcnv2_backbone = resnet50_backbone.copy({
'name': 'ResNet50_DCNv2',
'args': ([3, 4, 6, 3], [0, 4, 6, 3]),
})
darknet53_backbone = backbone_base.copy({
'name': 'DarkNet53',
'path': 'darknet53.pth',
'type': DarkNetBackbone,
'args': ([1, 2, 8, 8, 4],),
'transform': darknet_transform,
'selected_layers': list(range(3, 9)),
'pred_scales': [[3.5, 4.95], [3.6, 4.90], [3.3, 4.02], [2.7, 3.10], [2.1, 2.37], [1.8, 1.92]],
'pred_aspect_ratios': [ [[1, sqrt(2), 1/sqrt(2), sqrt(3), 1/sqrt(3)][:n], [1]] for n in [3, 5, 5, 5, 3, 3] ],
})
vgg16_arch = [[64, 64],
[ 'M', 128, 128],
[ 'M', 256, 256, 256],
[('M', {'kernel_size': 2, 'stride': 2, 'ceil_mode': True}), 512, 512, 512],
[ 'M', 512, 512, 512],
[('M', {'kernel_size': 3, 'stride': 1, 'padding': 1}),
(1024, {'kernel_size': 3, 'padding': 6, 'dilation': 6}),
(1024, {'kernel_size': 1})]]
vgg16_backbone = backbone_base.copy({
'name': 'VGG16',
'path': 'vgg16_reducedfc.pth',
'type': VGGBackbone,
'args': (vgg16_arch, [(256, 2), (128, 2), (128, 1), (128, 1)], [3]),
'transform': vgg_transform,
'selected_layers': [3] + list(range(5, 10)),
'pred_scales': [[5, 4]]*6,
'pred_aspect_ratios': [ [[1], [1, sqrt(2), 1/sqrt(2), sqrt(3), 1/sqrt(3)][:n]] for n in [3, 5, 5, 5, 3, 3] ],
})
# ----------------------- MASK BRANCH TYPES ----------------------- #
mask_type = Config({
# Direct produces masks directly as the output of each pred module.
# This is denoted as fc-mask in the paper.
# Parameters: mask_size, use_gt_bboxes
'direct': 0,
# Lincomb produces coefficients as the output of each pred module then uses those coefficients
# to linearly combine features from a prototype network to create image-sized masks.
# Parameters:
# - masks_to_train (int): Since we're producing (near) full image masks, it'd take too much
# vram to backprop on every single mask. Thus we select only a subset.
# - mask_proto_src (int): The input layer to the mask prototype generation network. This is an
# index in backbone.layers. Use to use the image itself instead.
# - mask_proto_net (list<tuple>): A list of layers in the mask proto network with the last one
# being where the masks are taken from. Each conv layer is in
# the form (num_features, kernel_size, **kwdargs). An empty
# list means to use the source for prototype masks. If the
# kernel_size is negative, this creates a deconv layer instead.
# If the kernel_size is negative and the num_features is None,
# this creates a simple bilinear interpolation layer instead.
# - mask_proto_bias (bool): Whether to include an extra coefficient that corresponds to a proto
# mask of all ones.
# - mask_proto_prototype_activation (func): The activation to apply to each prototype mask.
# - mask_proto_mask_activation (func): After summing the prototype masks with the predicted
# coeffs, what activation to apply to the final mask.
# - mask_proto_coeff_activation (func): The activation to apply to the mask coefficients.
# - mask_proto_crop (bool): If True, crop the mask with the predicted bbox during training.
# - mask_proto_crop_expand (float): If cropping, the percent to expand the cropping bbox by
# in each direction. This is to make the model less reliant
# on perfect bbox predictions.
# - mask_proto_loss (str [l1|disj]): If not None, apply an l1 or disjunctive regularization
# loss directly to the prototype masks.
# - mask_proto_binarize_downsampled_gt (bool): Binarize GT after dowsnampling during training?
# - mask_proto_normalize_mask_loss_by_sqrt_area (bool): Whether to normalize mask loss by sqrt(sum(gt))
# - mask_proto_reweight_mask_loss (bool): Reweight mask loss such that background is divided by
# #background and foreground is divided by #foreground.
# - mask_proto_grid_file (str): The path to the grid file to use with the next option.
# This should be a numpy.dump file with shape [numgrids, h, w]
# where h and w are w.r.t. the mask_proto_src convout.
# - mask_proto_use_grid (bool): Whether to add extra grid features to the proto_net input.
# - mask_proto_coeff_gate (bool): Add an extra set of sigmoided coefficients that is multiplied
# into the predicted coefficients in order to "gate" them.
# - mask_proto_prototypes_as_features (bool): For each prediction module, downsample the prototypes
# to the convout size of that module and supply the prototypes as input
# in addition to the already supplied backbone features.
# - mask_proto_prototypes_as_features_no_grad (bool): If the above is set, don't backprop gradients to
# to the prototypes from the network head.
# - mask_proto_remove_empty_masks (bool): Remove masks that are downsampled to 0 during loss calculations.
# - mask_proto_reweight_coeff (float): The coefficient to multiple the forground pixels with if reweighting.
# - mask_proto_coeff_diversity_loss (bool): Apply coefficient diversity loss on the coefficients so that the same
# instance has similar coefficients.
# - mask_proto_coeff_diversity_alpha (float): The weight to use for the coefficient diversity loss.
# - mask_proto_normalize_emulate_roi_pooling (bool): Normalize the mask loss to emulate roi pooling's affect on loss.
# - mask_proto_double_loss (bool): Whether to use the old loss in addition to any special new losses.
# - mask_proto_double_loss_alpha (float): The alpha to weight the above loss.
# - mask_proto_split_prototypes_by_head (bool): If true, this will give each prediction head its own prototypes.
# - mask_proto_crop_with_pred_box (bool): Whether to crop with the predicted box or the gt box.
'lincomb': 1,
})
# ----------------------- ACTIVATION FUNCTIONS ----------------------- #
activation_func = Config({
'tanh': torch.tanh,
'sigmoid': torch.sigmoid,
'softmax': lambda x: torch.nn.functional.softmax(x, dim=-1),
'relu': lambda x: torch.nn.functional.relu(x, inplace=True),
'none': lambda x: x,
})
# ----------------------- FPN DEFAULTS ----------------------- #
fpn_base = Config({
# The number of features to have in each FPN layer
'num_features': 256,
# The upsampling mode used
'interpolation_mode': 'bilinear',
# The number of extra layers to be produced by downsampling starting at P5
'num_downsample': 1,
# Whether to down sample with a 3x3 stride 2 conv layer instead of just a stride 2 selection
'use_conv_downsample': False,
# Whether to pad the pred layers with 1 on each side (I forgot to add this at the start)
# This is just here for backwards compatibility
'pad': True,
# Whether to add relu to the downsampled layers.
'relu_downsample_layers': False,
# Whether to add relu to the regular layers
'relu_pred_layers': True,
})
# ----------------------- CONFIG DEFAULTS ----------------------- #
coco_base_config = Config({
'dataset': coco2014_dataset,
'num_classes': 81, # This should include the background class
'max_iter': 400000,
# The maximum number of detections for evaluation
'max_num_detections': 100,
# dw' = momentum * dw - lr * (grad + decay * w)
'lr': 1e-3,
'momentum': 0.9,
'decay': 5e-4,
# For each lr step, what to multiply the lr with
'gamma': 0.1,
'lr_steps': (280000, 360000, 400000),
# Initial learning rate to linearly warmup from (if until > 0)
'lr_warmup_init': 1e-4,
# If > 0 then increase the lr linearly from warmup_init to lr each iter for until iters
'lr_warmup_until': 500,
# The terms to scale the respective loss by
'conf_alpha': 1,
'bbox_alpha': 1.5,
'mask_alpha': 0.4 / 256 * 140 * 140, # Some funky equation. Don't worry about it.
# Eval.py sets this if you just want to run YOLACT as a detector
'eval_mask_branch': True,
# Top_k examples to consider for NMS
'nms_top_k': 200,
# Examples with confidence less than this are not considered by NMS
'nms_conf_thresh': 0.05,
# Boxes with IoU overlap greater than this threshold will be culled during NMS
'nms_thresh': 0.5,
# See mask_type for details.
'mask_type': mask_type.direct,
'mask_size': 16,
'masks_to_train': 100,
'mask_proto_src': None,
'mask_proto_net': [(256, 3, {}), (256, 3, {})],
'mask_proto_bias': False,
'mask_proto_prototype_activation': activation_func.relu,
'mask_proto_mask_activation': activation_func.sigmoid,
'mask_proto_coeff_activation': activation_func.tanh,
'mask_proto_crop': True,
'mask_proto_crop_expand': 0,
'mask_proto_loss': None,
'mask_proto_binarize_downsampled_gt': True,
'mask_proto_normalize_mask_loss_by_sqrt_area': False,
'mask_proto_reweight_mask_loss': False,
'mask_proto_grid_file': 'data/grid.npy',
'mask_proto_use_grid': False,
'mask_proto_coeff_gate': False,
'mask_proto_prototypes_as_features': False,
'mask_proto_prototypes_as_features_no_grad': False,
'mask_proto_remove_empty_masks': False,
'mask_proto_reweight_coeff': 1,
'mask_proto_coeff_diversity_loss': False,
'mask_proto_coeff_diversity_alpha': 1,
'mask_proto_normalize_emulate_roi_pooling': False,
'mask_proto_double_loss': False,
'mask_proto_double_loss_alpha': 1,
'mask_proto_split_prototypes_by_head': False,
'mask_proto_crop_with_pred_box': False,
# SSD data augmentation parameters
# Randomize hue, vibrance, etc.
'augment_photometric_distort': True,
# Have a chance to scale down the image and pad (to emulate smaller detections)
'augment_expand': True,
# Potentialy sample a random crop from the image and put it in a random place
'augment_random_sample_crop': True,
# Mirror the image with a probability of 1/2
'augment_random_mirror': True,
# Flip the image vertically with a probability of 1/2
'augment_random_flip': False,
# With uniform probability, rotate the image [0,90,180,270] degrees
'augment_random_rot90': False,
# Discard detections with width and height smaller than this (in absolute width and height)
'discard_box_width': 4 / 550,
'discard_box_height': 4 / 550,
# If using batchnorm anywhere in the backbone, freeze the batchnorm layer during training.
# Note: any additional batch norm layers after the backbone will not be frozen.
'freeze_bn': False,
# Set this to a config object if you want an FPN (inherit from fpn_base). See fpn_base for details.
'fpn': None,
# Use the same weights for each network head
'share_prediction_module': False,
# For hard negative mining, instead of using the negatives that are leastl confidently background,
# use negatives that are most confidently not background.
'ohem_use_most_confident': False,
# Use focal loss as described in https://arxiv.org/pdf/1708.02002.pdf instead of OHEM
'use_focal_loss': False,
'focal_loss_alpha': 0.25,
'focal_loss_gamma': 2,
# The initial bias toward forground objects, as specified in the focal loss paper
'focal_loss_init_pi': 0.01,
# Keeps track of the average number of examples for each class, and weights the loss for that class accordingly.
'use_class_balanced_conf': False,
# Whether to use sigmoid focal loss instead of softmax, all else being the same.
'use_sigmoid_focal_loss': False,
# Use class[0] to be the objectness score and class[1:] to be the softmax predicted class.
# Note: at the moment this is only implemented if use_focal_loss is on.
'use_objectness_score': False,
# Adds a global pool + fc layer to the smallest selected layer that predicts the existence of each of the 80 classes.
# This branch is only evaluated during training time and is just there for multitask learning.
'use_class_existence_loss': False,
'class_existence_alpha': 1,
# Adds a 1x1 convolution directly to the biggest selected layer that predicts a semantic segmentations for each of the 80 classes.
# This branch is only evaluated during training time and is just there for multitask learning.
'use_semantic_segmentation_loss': False,
'semantic_segmentation_alpha': 1,
# Adds another branch to the netwok to predict Mask IoU.
'use_mask_scoring': False,
'mask_scoring_alpha': 1,
# Match gt boxes using the Box2Pix change metric instead of the standard IoU metric.
# Note that the threshold you set for iou_threshold should be negative with this setting on.
'use_change_matching': False,
# Uses the same network format as mask_proto_net, except this time it's for adding extra head layers before the final
# prediction in prediction modules. If this is none, no extra layers will be added.
'extra_head_net': None,
# What params should the final head layers have (the ones that predict box, confidence, and mask coeffs)
'head_layer_params': {'kernel_size': 3, 'padding': 1},
# Add extra layers between the backbone and the network heads
# The order is (bbox, conf, mask)
'extra_layers': (0, 0, 0),
# During training, to match detections with gt, first compute the maximum gt IoU for each prior.
# Then, any of those priors whose maximum overlap is over the positive threshold, mark as positive.
# For any priors whose maximum is less than the negative iou threshold, mark them as negative.
# The rest are neutral and not used in calculating the loss.
'positive_iou_threshold': 0.5,
'negative_iou_threshold': 0.5,
# When using ohem, the ratio between positives and negatives (3 means 3 negatives to 1 positive)
'ohem_negpos_ratio': 3,
# If less than 1, anchors treated as a negative that have a crowd iou over this threshold with
# the crowd boxes will be treated as a neutral.
'crowd_iou_threshold': 1,
# This is filled in at runtime by Yolact's __init__, so don't touch it
'mask_dim': None,
# Input image size.
'max_size': 300,
# Whether or not to do post processing on the cpu at test time
'force_cpu_nms': True,
# Whether to use mask coefficient cosine similarity nms instead of bbox iou nms
'use_coeff_nms': False,
# Whether or not to have a separate branch whose sole purpose is to act as the coefficients for coeff_diversity_loss
# Remember to turn on coeff_diversity_loss, or these extra coefficients won't do anything!
# To see their effect, also remember to turn on use_coeff_nms.
'use_instance_coeff': False,
'num_instance_coeffs': 64,
# Whether or not to tie the mask loss / box loss to 0
'train_masks': True,
'train_boxes': True,
# If enabled, the gt masks will be cropped using the gt bboxes instead of the predicted ones.
# This speeds up training time considerably but results in much worse mAP at test time.
'use_gt_bboxes': False,
# Whether or not to preserve aspect ratio when resizing the image.
# If True, this will resize all images to be max_size^2 pixels in area while keeping aspect ratio.
# If False, all images are resized to max_size x max_size
'preserve_aspect_ratio': False,
# Whether or not to use the prediction module (c) from DSSD
'use_prediction_module': False,
# Whether or not to use the predicted coordinate scheme from Yolo v2
'use_yolo_regressors': False,
# For training, bboxes are considered "positive" if their anchors have a 0.5 IoU overlap
# or greater with a ground truth box. If this is true, instead of using the anchor boxes
# for this IoU computation, the matching function will use the predicted bbox coordinates.
# Don't turn this on if you're not using yolo regressors!
'use_prediction_matching': False,
# A list of settings to apply after the specified iteration. Each element of the list should look like
# (iteration, config_dict) where config_dict is a dictionary you'd pass into a config object's init.
'delayed_settings': [],
# Use command-line arguments to set this.
'no_jit': False,
'backbone': None,
'name': 'base_config',
# Fast Mask Re-scoring Network
# Inspried by Mask Scoring R-CNN (https://arxiv.org/abs/1903.00241)
# Do not crop out the mask with bbox but slide a convnet on the image-size mask,
# then use global pooling to get the final mask score
'use_maskiou': False,
# Archecture for the mask iou network. A (num_classes-1, 1, {}) layer is appended to the end.
'maskiou_net': [],
# Discard predicted masks whose area is less than this
'discard_mask_area': -1,
'maskiou_alpha': 1.0,
'rescore_mask': False,
'rescore_bbox': False,
'maskious_to_train': -1,
})
# ----------------------- YOLACT v1.0 CONFIGS ----------------------- #
yolact_base_config = coco_base_config.copy({
'name': 'yolact_base',
# Dataset stuff
'dataset': coco2017_dataset,
'num_classes': len(coco2017_dataset.class_names) + 1,
# Image Size
'max_size': 550,
# Training params
'lr_steps': (280000, 600000, 700000, 750000),
'max_iter': 800000,
# Backbone Settings
'backbone': resnet101_backbone.copy({
'selected_layers': list(range(1, 4)),
'use_pixel_scales': True,
'preapply_sqrt': False,
'use_square_anchors': True, # This is for backward compatability with a bug
'pred_aspect_ratios': [ [[1, 1/2, 2]] ]*5,
'pred_scales': [[24], [48], [96], [192], [384]],
}),
# FPN Settings
'fpn': fpn_base.copy({
'use_conv_downsample': True,
'num_downsample': 2,
}),
# Mask Settings
'mask_type': mask_type.lincomb,
'mask_alpha': 6.125,
'mask_proto_src': 0,
'mask_proto_net': [(256, 3, {'padding': 1})] * 3 + [(None, -2, {}), (256, 3, {'padding': 1})] + [(32, 1, {})],
'mask_proto_normalize_emulate_roi_pooling': True,
# Other stuff
'share_prediction_module': True,
'extra_head_net': [(256, 3, {'padding': 1})],
'positive_iou_threshold': 0.5,
'negative_iou_threshold': 0.4,
'crowd_iou_threshold': 0.7,
'use_semantic_segmentation_loss': True,
})
yolact_im400_config = yolact_base_config.copy({
'name': 'yolact_im400',
'max_size': 400,
'backbone': yolact_base_config.backbone.copy({
'pred_scales': [[int(x[0] / yolact_base_config.max_size * 400)] for x in yolact_base_config.backbone.pred_scales],
}),
})
yolact_im700_config = yolact_base_config.copy({
'name': 'yolact_im700',
'masks_to_train': 300,
'max_size': 700,
'backbone': yolact_base_config.backbone.copy({
'pred_scales': [[int(x[0] / yolact_base_config.max_size * 700)] for x in yolact_base_config.backbone.pred_scales],
}),
})
yolact_darknet53_config = yolact_base_config.copy({
'name': 'yolact_darknet53',
'backbone': darknet53_backbone.copy({
'selected_layers': list(range(2, 5)),
'pred_scales': yolact_base_config.backbone.pred_scales,
'pred_aspect_ratios': yolact_base_config.backbone.pred_aspect_ratios,
'use_pixel_scales': True,
'preapply_sqrt': False,
'use_square_anchors': True, # This is for backward compatability with a bug
}),
})
yolact_resnet50_config = yolact_base_config.copy({
'name': 'yolact_resnet50',
'backbone': resnet50_backbone.copy({
'selected_layers': list(range(1, 4)),
'pred_scales': yolact_base_config.backbone.pred_scales,
'pred_aspect_ratios': yolact_base_config.backbone.pred_aspect_ratios,
'use_pixel_scales': True,
'preapply_sqrt': False,
'use_square_anchors': True, # This is for backward compatability with a bug
}),
})
yolact_resnet50_pascal_config = yolact_resnet50_config.copy({
'name': None, # Will default to yolact_resnet50_pascal
# Dataset stuff
'dataset': pascal_sbd_dataset,
'num_classes': len(pascal_sbd_dataset.class_names) + 1,
'max_iter': 120000,
'lr_steps': (60000, 100000),
'backbone': yolact_resnet50_config.backbone.copy({
'pred_scales': [[32], [64], [128], [256], [512]],
'use_square_anchors': False,
})
})
# ----------------------- YOLACT++ CONFIGS ----------------------- #
yolact_plus_base_config = yolact_base_config.copy({
'name': 'yolact_plus_base',
'backbone': resnet101_dcn_inter3_backbone.copy({
'selected_layers': list(range(1, 4)),
'pred_aspect_ratios': [ [[1, 1/2, 2]] ]*5,
'pred_scales': [[i * 2 ** (j / 3.0) for j in range(3)] for i in [24, 48, 96, 192, 384]],
'use_pixel_scales': True,
'preapply_sqrt': False,
'use_square_anchors': False,
}),
'use_maskiou': True,
'maskiou_net': [(8, 3, {'stride': 2}), (16, 3, {'stride': 2}), (32, 3, {'stride': 2}), (64, 3, {'stride': 2}), (128, 3, {'stride': 2})],
'maskiou_alpha': 25,
'rescore_bbox': False,
'rescore_mask': True,
'discard_mask_area': 5*5,
})
yolact_plus_resnet50_config = yolact_plus_base_config.copy({
'name': 'yolact_plus_resnet50',
'backbone': resnet50_dcnv2_backbone.copy({
'selected_layers': list(range(1, 4)),
'pred_aspect_ratios': [ [[1, 1/2, 2]] ]*5,
'pred_scales': [[i * 2 ** (j / 3.0) for j in range(3)] for i in [24, 48, 96, 192, 384]],
'use_pixel_scales': True,
'preapply_sqrt': False,
'use_square_anchors': False,
}),
})
# Default config
cfg = yolact_base_config.copy()
def set_cfg(config_name:str):
""" Sets the active config. Works even if cfg is already imported! """
global cfg
# Note this is not just an eval because I'm lazy, but also because it can
# be used like ssd300_config.copy({'max_size': 400}) for extreme fine-tuning
cfg.replace(eval(config_name))
if cfg.name is None:
cfg.name = config_name.split('_config')[0]
def set_dataset(dataset_name:str):
""" Sets the dataset of the current config. """
cfg.dataset = eval(dataset_name)
================================================
FILE: data/scripts/COCO.sh
================================================
#!/bin/bash
start=`date +%s`
# handle optional download dir
if [ -z "$1" ]
then
# navigate to ./data
echo "navigating to ./data/ ..."
mkdir -p ./data
cd ./data/
mkdir -p ./coco
cd ./coco
mkdir -p ./images
mkdir -p ./annotations
else
# check if specified dir is valid
if [ ! -d $1 ]; then
echo $1 " is not a valid directory"
exit 0
fi
echo "navigating to " $1 " ..."
cd $1
fi
if [ ! -d images ]
then
mkdir -p ./images
fi
# Download the image data.
cd ./images
echo "Downloading MSCOCO train images ..."
curl -LO http://images.cocodataset.org/zips/train2017.zip
echo "Downloading MSCOCO val images ..."
curl -LO http://images.cocodataset.org/zips/val2017.zip
cd ../
if [ ! -d annotations ]
then
mkdir -p ./annotations
fi
# Download the annotation data.
cd ./annotations
echo "Downloading MSCOCO train/val annotations ..."
curl -LO http://images.cocodataset.org/annotations/annotations_trainval2014.zip
curl -LO http://images.cocodataset.org/annotations/annotations_trainval2017.zip
echo "Finished downloading. Now extracting ..."
# Unzip data
echo "Extracting train images ..."
unzip -qqjd ../images ../images/train2017.zip
echo "Extracting val images ..."
unzip -qqjd ../images ../images/val2017.zip
echo "Extracting annotations ..."
unzip -qqd .. ./annotations_trainval2014.zip
unzip -qqd .. ./annotations_trainval2017.zip
echo "Removing zip files ..."
rm ../images/train2017.zip
rm ../images/val2017.zip
rm ./annotations_trainval2014.zip
rm ./annotations_trainval2017.zip
end=`date +%s`
runtime=$((end-start))
echo "Completed in " $runtime " seconds"
================================================
FILE: data/scripts/COCO_test.sh
================================================
#!/bin/bash
start=`date +%s`
# handle optional download dir
if [ -z "$1" ]
then
# navigate to ./data
echo "navigating to ./data/ ..."
mkdir -p ./data
cd ./data/
mkdir -p ./coco
cd ./coco
mkdir -p ./images
mkdir -p ./annotations
else
# check if specified dir is valid
if [ ! -d $1 ]; then
echo $1 " is not a valid directory"
exit 0
fi
echo "navigating to " $1 " ..."
cd $1
fi
if [ ! -d images ]
then
mkdir -p ./images
fi
# Download the image data.
cd ./images
echo "Downloading MSCOCO test images ..."
curl -LO http://images.cocodataset.org/zips/test2017.zip
cd ../
if [ ! -d annotations ]
then
mkdir -p ./annotations
fi
# Download the annotation data.
cd ./annotations
echo "Downloading MSCOCO test info ..."
curl -LO http://images.cocodataset.org/annotations/image_info_test2017.zip
echo "Finished downloading. Now extracting ..."
# Unzip data
echo "Extracting train images ..."
unzip -qqjd ../images ../images/test2017.zip
echo "Extracting info ..."
unzip -qqd .. ./image_info_test2017.zip
echo "Removing zip files ..."
rm ../images/test2017.zip
rm ./image_info_test2017.zip
end=`date +%s`
runtime=$((end-start))
echo "Completed in " $runtime " seconds"
================================================
FILE: data/scripts/mix_sets.py
================================================
import json
import os
import sys
from collections import defaultdict
usage_text = """
This script creates a coco annotation file by mixing one or more existing annotation files.
Usage: python data/scripts/mix_sets.py output_name [set1 range1 [set2 range2 [...]]]
To use, specify the output annotation name and any number of set + range pairs, where the sets
are in the form instances_<set_name>.json and ranges are python-evalable ranges. The resulting
json will be spit out as instances_<output_name>.json in the same folder as the input sets.
For instance,
python data/scripts/mix_sets.py trainval35k train2014 : val2014 :-5000
This will create an instance_trainval35k.json file with all images and corresponding annotations
from train2014 and the first 35000 images from val2014.
You can also specify only one set:
python data/scripts/mix_sets.py minival5k val2014 -5000:
This will take the last 5k images from val2014 and put it in instances_minival5k.json.
"""
annotations_path = 'data/coco/annotations/instances_%s.json'
fields_to_combine = ('images', 'annotations')
fields_to_steal = ('info', 'categories', 'licenses')
if __name__ == '__main__':
if len(sys.argv) < 4 or len(sys.argv) % 2 != 0:
print(usage_text)
exit()
out_name = sys.argv[1]
sets = sys.argv[2:]
sets = [(sets[2*i], sets[2*i+1]) for i in range(len(sets)//2)]
out = {x: [] for x in fields_to_combine}
for idx, (set_name, range_str) in enumerate(sets):
print('Loading set %s...' % set_name)
with open(annotations_path % set_name, 'r') as f:
set_json = json.load(f)
# "Steal" some fields that don't need to be combined from the first set
if idx == 0:
for field in fields_to_steal:
out[field] = set_json[field]
print('Building image index...')
image_idx = {x['id']: x for x in set_json['images']}
print('Collecting annotations...')
anns_idx = defaultdict(lambda: [])
for ann in set_json['annotations']:
anns_idx[ann['image_id']].append(ann)
export_ids = list(image_idx.keys())
export_ids.sort()
export_ids = eval('export_ids[%s]' % range_str, {}, {'export_ids': export_ids})
print('Adding %d images...' % len(export_ids))
for _id in export_ids:
out['images'].append(image_idx[_id])
out['annotations'] += anns_idx[_id]
print('Done.\n')
print('Saving result...')
with open(annotations_path % (out_name), 'w') as out_file:
json.dump(out, out_file)
================================================
FILE: environment.yml
================================================
# Installs dependencies for YOLACT managed by Anaconda.
# Advantage is you get working CUDA+cuDNN+pytorch+torchvison versions.
#
# TODO: you must additionally install nVidia drivers, eg. on Ubuntu linux
# `apt install nvidia-driver-440` (change the 440 for whatever version you need/have).
#
name: yolact-env
#prefix: /your/custom/path/envs/yolact-env
channels:
- conda-forge
- pytorch
- defaults
dependencies:
- python==3.7
- pip
- cython
- pytorch::torchvision
- pytorch::pytorch >=1.0.1
- cudatoolkit
- cudnn
- pytorch::cuda100
- matplotlib
- git # to download COCO dataset
- curl # to download COCO dataset
- unzip # to download COCO dataset
- conda-forge::bash # to download COCO dataset
- pip:
- opencv-python
- pillow <7.0 # bug PILLOW_VERSION in torchvision, must be < 7.0 until torchvision is upgraded
- pycocotools
- PyQt5 # needed on KDE/Qt envs for matplotlib
================================================
FILE: eval.py
================================================
from data import COCODetection, get_label_map, MEANS, COLORS
from yolact import Yolact
from utils.augmentations import BaseTransform, FastBaseTransform, Resize
from utils.functions import MovingAverage, ProgressBar
from layers.box_utils import jaccard, center_size, mask_iou
from utils import timer
from utils.functions import SavePath
from layers.output_utils import postprocess, undo_image_transformation
import pycocotools
from data import cfg, set_cfg, set_dataset
import numpy as np
import torch
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
import argparse
import time
import random
import cProfile
import pickle
import json
import os
from collections import defaultdict
from pathlib import Path
from collections import OrderedDict
from PIL import Image
import matplotlib.pyplot as plt
import cv2
def str2bool(v):
if v.lower() in ('yes', 'true', 't', 'y', '1'):
return True
elif v.lower() in ('no', 'false', 'f', 'n', '0'):
return False
else:
raise argparse.ArgumentTypeError('Boolean value expected.')
def parse_args(argv=None):
parser = argparse.ArgumentParser(
description='YOLACT COCO Evaluation')
parser.add_argument('--trained_model',
default='weights/ssd300_mAP_77.43_v2.pth', type=str,
help='Trained state_dict file path to open. If "interrupt", this will open the interrupt file.')
parser.add_argument('--top_k', default=5, type=int,
help='Further restrict the number of predictions to parse')
parser.add_argument('--cuda', default=True, type=str2bool,
help='Use cuda to evaulate model')
parser.add_argument('--fast_nms', default=False, type=str2bool,
help='Whether to use a faster, but not entirely correct version of NMS.')
parser.add_argument('--cluster_nms', default=True, type=str2bool,
help='Whether to use a fast and correct version of NMS.')
parser.add_argument('--cluster_diounms', default=True, type=str2bool,
help='Whether to use a fast and correct version of DIoU-NMS.')
parser.add_argument('--spm', default=True, type=str2bool,
help='Whether to use a score penalty mechanism for cluster NMS.')
parser.add_argument('--spm_dist', default=True, type=str2bool,
help='Whether to use a score penalty mechanism + distance for cluster NMS.')
parser.add_argument('--spm_dist_weighted', default=True, type=str2bool,
help='Whether to use a score penalty mechanism + distance + weighted coordinates for cluster NMS.')
parser.add_argument('--cross_class_nms', default=False, type=str2bool,
help='Whether compute NMS cross-class or per-class. It surports above NMS strategies.')
parser.add_argument('--display_masks', default=True, type=str2bool,
help='Whether or not to display masks over bounding boxes')
parser.add_argument('--display_bboxes', default=True, type=str2bool,
help='Whether or not to display bboxes around masks')
parser.add_argument('--display_text', default=True, type=str2bool,
help='Whether or not to display text (class [score])')
parser.add_argument('--display_scores', default=True, type=str2bool,
help='Whether or not to display scores in addition to classes')
parser.add_argument('--display', dest='display', action='store_true',
help='Display qualitative results instead of quantitative ones.')
parser.add_argument('--shuffle', dest='shuffle', action='store_true',
help='Shuffles the images when displaying them. Doesn\'t have much of an effect when display is off though.')
parser.add_argument('--ap_data_file', default='results/ap_data.pkl', type=str,
help='In quantitative mode, the file to save detections before calculating mAP.')
parser.add_argument('--resume', dest='resume', action='store_true',
help='If display not set, this resumes mAP calculations from the ap_data_file.')
parser.add_argument('--max_images', default=-1, type=int,
help='The maximum number of images from the dataset to consider. Use -1 for all.')
parser.add_argument('--output_coco_json', dest='output_coco_json', action='store_true',
help='If display is not set, instead of processing IoU values, this just dumps detections into the coco json file.')
parser.add_argument('--bbox_det_file', default='results/bbox_detections.json', type=str,
help='The output file for coco bbox results if --coco_results is set.')
parser.add_argument('--mask_det_file', default='results/mask_detections.json', type=str,
help='The output file for coco mask results if --coco_results is set.')
parser.add_argument('--config', default=None,
help='The config object to use.')
parser.add_argument('--output_web_json', dest='output_web_json', action='store_true',
help='If display is not set, instead of processing IoU values, this dumps detections for usage with the detections viewer web thingy.')
parser.add_argument('--web_det_path', default='web/dets/', type=str,
help='If output_web_json is set, this is the path to dump detections into.')
parser.add_argument('--no_bar', dest='no_bar', action='store_true',
help='Do not output the status bar. This is useful for when piping to a file.')
parser.add_argument('--display_lincomb', default=False, type=str2bool,
help='If the config uses lincomb masks, output a visualization of how those masks are created.')
parser.add_argument('--benchmark', default=False, dest='benchmark', action='store_true',
help='Equivalent to running display mode but without displaying an image.')
parser.add_argument('--no_sort', default=False, dest='no_sort', action='store_true',
help='Do not sort images by hashed image ID.')
parser.add_argument('--seed', default=None, type=int,
help='The seed to pass into random.seed. Note: this is only really for the shuffle and does not (I think) affect cuda stuff.')
parser.add_argument('--mask_proto_debug', default=False, dest='mask_proto_debug', action='store_true',
help='Outputs stuff for scripts/compute_mask.py.')
parser.add_argument('--no_crop', default=False, dest='crop', action='store_false',
help='Do not crop output masks with the predicted bounding box.')
parser.add_argument('--image', default=None, type=str,
help='A path to an image to use for display.')
parser.add_argument('--images', default=None, type=str,
help='An input folder of images and output folder to save detected images. Should be in the format input->output.')
parser.add_argument('--video', default=None, type=str,
help='A path to a video to evaluate on. Passing in a number will use that index webcam.')
parser.add_argument('--video_multiframe', default=1, type=int,
help='The number of frames to evaluate in parallel to make videos play at higher fps.')
parser.add_argument('--score_threshold', default=0, type=float,
help='Detections with a score under this threshold will not be considered. This currently only works in display mode.')
parser.add_argument('--dataset', default=None, type=str,
help='If specified, override the dataset specified in the config with this one (example: coco2017_dataset).')
parser.add_argument('--detect', default=False, dest='detect', action='store_true',
help='Don\'t evauluate the mask branch at all and only do object detection. This only works for --display and --benchmark.')
parser.add_argument('--display_fps', default=False, dest='display_fps', action='store_true',
help='When displaying / saving video, draw the FPS on the frame')
parser.add_argument('--emulate_playback', default=False, dest='emulate_playback', action='store_true',
help='When saving a video, emulate the framerate that you\'d get running in real-time mode.')
parser.set_defaults(no_bar=False, display=False, resume=False, output_coco_json=False, output_web_json=False, shuffle=False,
benchmark=False, no_sort=False, no_hash=False, mask_proto_debug=False, crop=True, detect=False, display_fps=False,
emulate_playback=False)
global args
args = parser.parse_args(argv)
if args.output_web_json:
args.output_coco_json = True
if args.seed is not None:
random.seed(args.seed)
iou_thresholds = [x / 100 for x in range(50, 100, 5)]
coco_cats = {} # Call prep_coco_cats to fill this
coco_cats_inv = {}
color_cache = defaultdict(lambda: {})
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''):
"""
Note: If undo_transform=False then im_h and im_w are allowed to be None.
"""
if undo_transform:
img_numpy = undo_image_transformation(img, w, h)
img_gpu = torch.Tensor(img_numpy).cuda()
else:
img_gpu = img / 255.0
h, w, _ = img.shape
with timer.env('Postprocess'):
save = cfg.rescore_bbox
cfg.rescore_bbox = True
t = postprocess(dets_out, w, h, visualize_lincomb = args.display_lincomb,
crop_masks = args.crop,
score_threshold = args.score_threshold)
cfg.rescore_bbox = save
with timer.env('Copy'):
idx = t[1].argsort(0, descending=True)[:args.top_k]
if cfg.eval_mask_branch:
# Masks are drawn on the GPU, so don't copy
masks = t[3][idx]
classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]]
num_dets_to_consider = min(args.top_k, classes.shape[0])
for j in range(num_dets_to_consider):
if scores[j] < args.score_threshold:
num_dets_to_consider = j
break
# Quick and dirty lambda for selecting the color for a particular index
# Also keeps track of a per-gpu color cache for maximum speed
def get_color(j, on_gpu=None):
global color_cache
color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS)
if on_gpu is not None and color_idx in color_cache[on_gpu]:
return color_cache[on_gpu][color_idx]
else:
color = COLORS[color_idx]
if not undo_transform:
# The image might come in as RGB or BRG, depending
color = (color[2], color[1], color[0])
if on_gpu is not None:
color = torch.Tensor(color).to(on_gpu).float() / 255.
color_cache[on_gpu][color_idx] = color
return color
# First, draw the masks on the GPU where we can do it really fast
# Beware: very fast but possibly unintelligible mask-drawing code ahead
# I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice
if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0:
# After this, mask is of size [num_dets, h, w, 1]
masks = masks[:num_dets_to_consider, :, :, None]
# Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1])
colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0)
masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha
# This is 1 everywhere except for 1-mask_alpha where the mask is
inv_alph_masks = masks * (-mask_alpha) + 1
# I did the math for this on pen and paper. This whole block should be equivalent to:
# for j in range(num_dets_to_consider):
# img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j]
masks_color_summand = masks_color[0]
if num_dets_to_consider > 1:
inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0)
masks_color_cumul = masks_color[1:] * inv_alph_cumul
masks_color_summand += masks_color_cumul.sum(dim=0)
img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand
if args.display_fps:
# Draw the box for the fps on the GPU
font_face = cv2.FONT_HERSHEY_DUPLEX
font_scale = 0.6
font_thickness = 1
text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0]
img_gpu[0:text_h+8, 0:text_w+8] *= 0.6 # 1 - Box alpha
# Then draw the stuff that needs to be done on the cpu
# Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason
img_numpy = (img_gpu * 255).byte().cpu().numpy()
if args.display_fps:
# Draw the text on the CPU
text_pt = (4, text_h + 2)
text_color = [255, 255, 255]
cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)
if num_dets_to_consider == 0:
return img_numpy
if args.display_text or args.display_bboxes:
for j in reversed(range(num_dets_to_consider)):
x1, y1, x2, y2 = boxes[j, :]
color = get_color(j)
score = scores[j]
if args.display_bboxes:
cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1)
if args.display_text:
_class = cfg.dataset.class_names[classes[j]]
text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class
font_face = cv2.FONT_HERSHEY_DUPLEX
font_scale = 0.6
font_thickness = 1
text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0]
text_pt = (x1, y1 - 3)
text_color = [255, 255, 255]
cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1)
cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA)
return img_numpy
def prep_benchmark(dets_out, h, w):
with timer.env('Postprocess'):
t = postprocess(dets_out, w, h, crop_masks=args.crop, score_threshold=args.score_threshold)
with timer.env('Copy'):
classes, scores, boxes, masks = [x[:args.top_k] for x in t]
if isinstance(scores, list):
box_scores = scores[0].cpu().numpy()
mask_scores = scores[1].cpu().numpy()
else:
scores = scores.cpu().numpy()
classes = classes.cpu().numpy()
boxes = boxes.cpu().numpy()
masks = masks.cpu().numpy()
with timer.env('Sync'):
# Just in case
torch.cuda.synchronize()
def prep_coco_cats():
""" Prepare inverted table for category id lookup given a coco cats object. """
for coco_cat_id, transformed_cat_id_p1 in get_label_map().items():
transformed_cat_id = transformed_cat_id_p1 - 1
coco_cats[transformed_cat_id] = coco_cat_id
coco_cats_inv[coco_cat_id] = transformed_cat_id
def get_coco_cat(transformed_cat_id):
""" transformed_cat_id is [0,80) as indices in cfg.dataset.class_names """
return coco_cats[transformed_cat_id]
def get_transformed_cat(coco_cat_id):
""" transformed_cat_id is [0,80) as indices in cfg.dataset.class_names """
return coco_cats_inv[coco_cat_id]
class Detections:
def __init__(self):
self.bbox_data = []
self.mask_data = []
def add_bbox(self, image_id:int, category_id:int, bbox:list, score:float):
""" Note that bbox should be a list or tuple of (x1, y1, x2, y2) """
bbox = [bbox[0], bbox[1], bbox[2]-bbox[0], bbox[3]-bbox[1]]
# Round to the nearest 10th to avoid huge file sizes, as COCO suggests
bbox = [round(float(x)*10)/10 for x in bbox]
self.bbox_data.append({
'image_id': int(image_id),
'category_id': get_coco_cat(int(category_id)),
'bbox': bbox,
'score': float(score)
})
def add_mask(self, image_id:int, category_id:int, segmentation:np.ndarray, score:float):
""" The segmentation should be the full mask, the size of the image and with size [h, w]. """
rle = pycocotools.mask.encode(np.asfortranarray(segmentation.astype(np.uint8)))
rle['counts'] = rle['counts'].decode('ascii') # json.dump doesn't like bytes strings
self.mask_data.append({
'image_id': int(image_id),
'category_id': get_coco_cat(int(category_id)),
'segmentation': rle,
'score': float(score)
})
def dump(self):
dump_arguments = [
(self.bbox_data, args.bbox_det_file),
(self.mask_data, args.mask_det_file)
]
for data, path in dump_arguments:
with open(path, 'w') as f:
json.dump(data, f)
def dump_web(self):
""" Dumps it in the format for my web app. Warning: bad code ahead! """
config_outs = ['preserve_aspect_ratio', 'use_prediction_module',
'use_yolo_regressors', 'use_prediction_matching',
'train_masks']
output = {
'info' : {
'Config': {key: getattr(cfg, key) for key in config_outs},
}
}
image_ids = list(set([x['image_id'] for x in self.bbox_data]))
image_ids.sort()
image_lookup = {_id: idx for idx, _id in enumerate(image_ids)}
output['images'] = [{'image_id': image_id, 'dets': []} for image_id in image_ids]
# These should already be sorted by score with the way prep_metrics works.
for bbox, mask in zip(self.bbox_data, self.mask_data):
image_obj = output['images'][image_lookup[bbox['image_id']]]
image_obj['dets'].append({
'score': bbox['score'],
'bbox': bbox['bbox'],
'category': cfg.dataset.class_names[get_transformed_cat(bbox['category_id'])],
'mask': mask['segmentation'],
})
with open(os.path.join(args.web_det_path, '%s.json' % cfg.name), 'w') as f:
json.dump(output, f)
def _mask_iou(mask1, mask2, iscrowd=False):
with timer.env('Mask IoU'):
ret = mask_iou(mask1, mask2, iscrowd)
return ret.cpu()
def _bbox_iou(bbox1, bbox2, iscrowd=False):
with timer.env('BBox IoU'):
ret = jaccard(bbox1, bbox2, iscrowd)
return ret.cpu()
def prep_metrics(ap_data, dets, img, gt, gt_masks, h, w, num_crowd, image_id, detections:Detections=None):
""" Returns a list of APs for this image, with each element being for a class """
if not args.output_coco_json:
with timer.env('Prepare gt'):
gt_boxes = torch.Tensor(gt[:, :4])
gt_boxes[:, [0, 2]] *= w
gt_boxes[:, [1, 3]] *= h
gt_classes = list(gt[:, 4].astype(int))
gt_masks = torch.Tensor(gt_masks).view(-1, h*w)
if num_crowd > 0:
split = lambda x: (x[-num_crowd:], x[:-num_crowd])
crowd_boxes , gt_boxes = split(gt_boxes)
crowd_masks , gt_masks = split(gt_masks)
crowd_classes, gt_classes = split(gt_classes)
with timer.env('Postprocess'):
classes, scores, boxes, masks = postprocess(dets, w, h, crop_masks=args.crop, score_threshold=args.score_threshold)
if classes.size(0) == 0:
return
classes = list(classes.cpu().numpy().astype(int))
if isinstance(scores, list):
box_scores = list(scores[0].cpu().numpy().astype(float))
mask_scores = list(scores[1].cpu().numpy().astype(float))
else:
scores = list(scores.cpu().numpy().astype(float))
box_scores = scores
mask_scores = scores
masks = masks.view(-1, h*w).cuda()
boxes = boxes.cuda()
if args.output_coco_json:
with timer.env('JSON Output'):
boxes = boxes.cpu().numpy()
masks = masks.view(-1, h, w).cpu().numpy()
for i in range(masks.shape[0]):
# Make sure that the bounding box actually makes sense and a mask was produced
if (boxes[i, 3] - boxes[i, 1]) * (boxes[i, 2] - boxes[i, 0]) > 0:
detections.add_bbox(image_id, classes[i], boxes[i,:], box_scores[i])
detections.add_mask(image_id, classes[i], masks[i,:,:], mask_scores[i])
return
with timer.env('Eval Setup'):
num_pred = len(classes)
num_gt = len(gt_classes)
mask_iou_cache = _mask_iou(masks, gt_masks)
bbox_iou_cache = _bbox_iou(boxes.float(), gt_boxes.float())
if num_crowd > 0:
crowd_mask_iou_cache = _mask_iou(masks, crowd_masks, iscrowd=True)
crowd_bbox_iou_cache = _bbox_iou(boxes.float(), crowd_boxes.float(), iscrowd=True)
else:
crowd_mask_iou_cache = None
crowd_bbox_iou_cache = None
box_indices = sorted(range(num_pred), key=lambda i: -box_scores[i])
mask_indices = sorted(box_indices, key=lambda i: -mask_scores[i])
iou_types = [
('box', lambda i,j: bbox_iou_cache[i, j].item(),
lambda i,j: crowd_bbox_iou_cache[i,j].item(),
lambda i: box_scores[i], box_indices),
('mask', lambda i,j: mask_iou_cache[i, j].item(),
lambda i,j: crowd_mask_iou_cache[i,j].item(),
lambda i: mask_scores[i], mask_indices)
]
timer.start('Main loop')
for _class in set(classes + gt_classes):
ap_per_iou = []
num_gt_for_class = sum([1 for x in gt_classes if x == _class])
for iouIdx in range(len(iou_thresholds)):
iou_threshold = iou_thresholds[iouIdx]
for iou_type, iou_func, crowd_func, score_func, indices in iou_types:
gt_used = [False] * len(gt_classes)
ap_obj = ap_data[iou_type][iouIdx][_class]
ap_obj.add_gt_positives(num_gt_for_class)
for i in indices:
if classes[i] != _class:
continue
max_iou_found = iou_threshold
max_match_idx = -1
for j in range(num_gt):
if gt_used[j] or gt_classes[j] != _class:
continue
iou = iou_func(i, j)
if iou > max_iou_found:
max_iou_found = iou
max_match_idx = j
if max_match_idx >= 0:
gt_used[max_match_idx] = True
ap_obj.push(score_func(i), True)
else:
# If the detection matches a crowd, we can just ignore it
matched_crowd = False
if num_crowd > 0:
for j in range(len(crowd_classes)):
if crowd_classes[j] != _class:
continue
iou = crowd_func(i, j)
if iou > iou_threshold:
matched_crowd = True
break
# All this crowd code so that we can make sure that our eval code gives the
# same result as COCOEval. There aren't even that many crowd annotations to
# begin with, but accuracy is of the utmost importance.
if not matched_crowd:
ap_obj.push(score_func(i), False)
timer.stop('Main loop')
class APDataObject:
"""
Stores all the information necessary to calculate the AP for one IoU and one class.
Note: I type annotated this because why not.
"""
def __init__(self):
self.data_points = []
self.num_gt_positives = 0
def push(self, score:float, is_true:bool):
self.data_points.append((score, is_true))
def add_gt_positives(self, num_positives:int):
""" Call this once per image. """
self.num_gt_positives += num_positives
def is_empty(self) -> bool:
return len(self.data_points) == 0 and self.num_gt_positives == 0
def get_ap(self) -> float:
""" Warning: result not cached. """
if self.num_gt_positives == 0:
return 0
# Sort descending by score
self.data_points.sort(key=lambda x: -x[0])
precisions = []
recalls = []
num_true = 0
num_false = 0
# Compute the precision-recall curve. The x axis is recalls and the y axis precisions.
for datum in self.data_points:
# datum[1] is whether the detection a true or false positive
if datum[1]: num_true += 1
else: num_false += 1
precision = num_true / (num_true + num_false)
recall = num_true / self.num_gt_positives
precisions.append(precision)
recalls.append(recall)
# Smooth the curve by computing [max(precisions[i:]) for i in range(len(precisions))]
# Basically, remove any temporary dips from the curve.
# At least that's what I think, idk. COCOEval did it so I do too.
for i in range(len(precisions)-1, 0, -1):
if precisions[i] > precisions[i-1]:
precisions[i-1] = precisions[i]
# Compute the integral of precision(recall) d_recall from recall=0->1 using fixed-length riemann summation with 101 bars.
y_range = [0] * 101 # idx 0 is recall == 0.0 and idx 100 is recall == 1.00
x_range = np.array([x / 100 for x in range(101)])
recalls = np.array(recalls)
# I realize this is weird, but all it does is find the nearest precision(x) for a given x in x_range.
# Basically, if the closest recall we have to 0.01 is 0.009 this sets precision(0.01) = precision(0.009).
# I approximate the integral this way, because that's how COCOEval does it.
indices = np.searchsorted(recalls, x_range, side='left')
for bar_idx, precision_idx in enumerate(indices):
if precision_idx < len(precisions):
y_range[bar_idx] = precisions[precision_idx]
# Finally compute the riemann sum to get our integral.
# avg([precision(x) for x in 0:0.01:1])
return sum(y_range) / len(y_range)
def badhash(x):
"""
Just a quick and dirty hash function for doing a deterministic shuffle based on image_id.
Source:
https://stackoverflow.com/questions/664014/what-integer-hash-function-are-good-that-accepts-an-integer-hash-key
"""
x = (((x >> 16) ^ x) * 0x045d9f3b) & 0xFFFFFFFF
x = (((x >> 16) ^ x) * 0x045d9f3b) & 0xFFFFFFFF
x = ((x >> 16) ^ x) & 0xFFFFFFFF
return x
def evalimage(net:Yolact, path:str, save_path:str=None):
frame = torch.from_numpy(cv2.imread(path)).cuda().float()
batch = FastBaseTransform()(frame.unsqueeze(0))
preds = net(batch)
img_numpy = prep_display(preds, frame, None, None, undo_transform=False)
if save_path is None:
img_numpy = img_numpy[:, :, (2, 1, 0)]
if save_path is None:
plt.imshow(img_numpy)
plt.title(path)
plt.show()
else:
cv2.imwrite(save_path, img_numpy)
def evalimages(net:Yolact, input_folder:str, output_folder:str):
if not os.path.exists(output_folder):
os.mkdir(output_folder)
print()
for p in Path(input_folder).glob('*'):
path = str(p)
name = os.path.basename(path)
name = '.'.join(name.split('.')[:-1]) + '.png'
out_path = os.path.join(output_folder, name)
evalimage(net, path, out_path)
print(path + ' -> ' + out_path)
print('Done.')
from multiprocessing.pool import ThreadPool
from queue import Queue
class CustomDataParallel(torch.nn.DataParallel):
""" A Custom Data Parallel class that properly gathers lists of dictionaries. """
def gather(self, outputs, output_device):
# Note that I don't actually want to convert everything to the output_device
return sum(outputs, [])
def evalvideo(net:Yolact, path:str, out_path:str=None):
# If the path is a digit, parse it as a webcam index
is_webcam = path.isdigit()
# If the input image size is constant, this make things faster (hence why we can use it in a video setting).
cudnn.benchmark = True
if is_webcam:
vid = cv2.VideoCapture(int(path))
else:
vid = cv2.VideoCapture(path)
if not vid.isOpened():
print('Could not open video "%s"' % path)
exit(-1)
target_fps = round(vid.get(cv2.CAP_PROP_FPS))
frame_width = round(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = round(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
if is_webcam:
num_frames = float('inf')
else:
num_frames = round(vid.get(cv2.CAP_PROP_FRAME_COUNT))
net = CustomDataParallel(net).cuda()
transform = torch.nn.DataParallel(FastBaseTransform()).cuda()
frame_times = MovingAverage(100)
fps = 0
frame_time_target = 1 / target_fps
running = True
fps_str = ''
vid_done = False
frames_displayed = 0
if out_path is not None:
out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*"mp4v"), target_fps, (frame_width, frame_height))
def cleanup_and_exit():
print()
pool.terminate()
vid.release()
if out_path is not None:
out.release()
cv2.destroyAllWindows()
exit()
def get_next_frame(vid):
frames = []
for idx in range(args.video_multiframe):
frame = vid.read()[1]
if frame is None:
return frames
frames.append(frame)
return frames
def transform_frame(frames):
with torch.no_grad():
frames = [torch.from_numpy(frame).cuda().float() for frame in frames]
return frames, transform(torch.stack(frames, 0))
def eval_network(inp):
with torch.no_grad():
frames, imgs = inp
num_extra = 0
while imgs.size(0) < args.video_multiframe:
imgs = torch.cat([imgs, imgs[0].unsqueeze(0)], dim=0)
num_extra += 1
out = net(imgs)
if num_extra > 0:
out = out[:-num_extra]
return frames, out
def prep_frame(inp, fps_str):
with torch.no_grad():
frame, preds = inp
return prep_display(preds, frame, None, None, undo_transform=False, class_color=True, fps_str=fps_str)
frame_buffer = Queue()
video_fps = 0
# All this timing code to make sure that
def play_video():
try:
nonlocal frame_buffer, running, video_fps, is_webcam, num_frames, frames_displayed, vid_done
video_frame_times = MovingAverage(100)
frame_time_stabilizer = frame_time_target
last_time = None
stabilizer_step = 0.0005
progress_bar = ProgressBar(30, num_frames)
while running:
frame_time_start = time.time()
if not frame_buffer.empty():
next_time = time.time()
if last_time is not None:
video_frame_times.add(next_time - last_time)
video_fps = 1 / video_frame_times.get_avg()
if out_path is None:
cv2.imshow(path, frame_buffer.get())
else:
out.write(frame_buffer.get())
frames_displayed += 1
last_time = next_time
if out_path is not None:
if video_frame_times.get_avg() == 0:
fps = 0
else:
fps = 1 / video_frame_times.get_avg()
progress = frames_displayed / num_frames * 100
progress_bar.set_val(frames_displayed)
print('\rProcessing Frames %s %6d / %6d (%5.2f%%) %5.2f fps '
% (repr(progress_bar), frames_displayed, num_frames, progress, fps), end='')
# This is split because you don't want savevideo to require cv2 display functionality (see #197)
if out_path is None and cv2.waitKey(1) == 27:
# Press Escape to close
running = False
if not (frames_displayed < num_frames):
running = False
if not vid_done:
buffer_size = frame_buffer.qsize()
if buffer_size < args.video_multiframe:
frame_time_stabilizer += stabilizer_step
elif buffer_size > args.video_multiframe:
frame_time_stabilizer -= stabilizer_step
if frame_time_stabilizer < 0:
frame_time_stabilizer = 0
new_target = frame_time_stabilizer if is_webcam else max(frame_time_stabilizer, frame_time_target)
else:
new_target = frame_time_target
next_frame_target = max(2 * new_target - video_frame_times.get_avg(), 0)
target_time = frame_time_start + next_frame_target - 0.001 # Let's just subtract a millisecond to be safe
if out_path is None or args.emulate_playback:
# This gives more accurate timing than if sleeping the whole amount at once
while time.time() < target_time:
time.sleep(0.001)
else:
# Let's not starve the main thread, now
time.sleep(0.001)
except:
# See issue #197 for why this is necessary
import traceback
traceback.print_exc()
extract_frame = lambda x, i: (x[0][i] if x[1][i]['detection'] is None else x[0][i].to(x[1][i]['detection']['box'].device), [x[1][i]])
# Prime the network on the first frame because I do some thread unsafe things otherwise
print('Initializing model... ', end='')
first_batch = eval_network(transform_frame(get_next_frame(vid)))
print('Done.')
# For each frame the sequence of functions it needs to go through to be processed (in reversed order)
sequence = [prep_frame, eval_network, transform_frame]
pool = ThreadPool(processes=len(sequence) + args.video_multiframe + 2)
pool.apply_async(play_video)
active_frames = [{'value': extract_frame(first_batch, i), 'idx': 0} for i in range(len(first_batch[0]))]
print()
if out_path is None: print('Press Escape to close.')
try:
while vid.isOpened() and running:
# Hard limit on frames in buffer so we don't run out of memory >.>
while frame_buffer.qsize() > 100:
time.sleep(0.001)
start_time = time.time()
# Start loading the next frames from the disk
if not vid_done:
next_frames = pool.apply_async(get_next_frame, args=(vid,))
else:
next_frames = None
if not (vid_done and len(active_frames) == 0):
# For each frame in our active processing queue, dispatch a job
# for that frame using the current function in the sequence
for frame in active_frames:
_args = [frame['value']]
if frame['idx'] == 0:
_args.append(fps_str)
frame['value'] = pool.apply_async(sequence[frame['idx']], args=_args)
# For each frame whose job was the last in the sequence (i.e. for all final outputs)
for frame in active_frames:
if frame['idx'] == 0:
frame_buffer.put(frame['value'].get())
# Remove the finished frames from the processing queue
active_frames = [x for x in active_frames if x['idx'] > 0]
# Finish evaluating every frame in the processing queue and advanced their position in the sequence
for frame in list(reversed(active_frames)):
frame['value'] = frame['value'].get()
frame['idx'] -= 1
if frame['idx'] == 0:
# Split this up into individual threads for prep_frame since it doesn't support batch size
active_frames += [{'value': extract_frame(frame['value'], i), 'idx': 0} for i in range(1, len(frame['value'][0]))]
frame['value'] = extract_frame(frame['value'], 0)
# Finish loading in the next frames and add them to the processing queue
if next_frames is not None:
frames = next_frames.get()
if len(frames) == 0:
vid_done = True
else:
active_frames.append({'value': frames, 'idx': len(sequence)-1})
# Compute FPS
frame_times.add(time.time() - start_time)
fps = args.video_multiframe / frame_times.get_avg()
else:
fps = 0
fps_str = 'Processing FPS: %.2f | Video Playback FPS: %.2f | Frames in Buffer: %d' % (fps, video_fps, frame_buffer.qsize())
if not args.display_fps:
print('\r' + fps_str + ' ', end='')
except KeyboardInterrupt:
print('\nStopping...')
cleanup_and_exit()
def evaluate(net:Yolact, dataset, train_mode=False):
net.detect.use_fast_nms = args.fast_nms
net.detect.use_cluster_nms = args.cluster_nms
net.detect.use_cluster_diounms = args.cluster_diounms
net.detect.use_spm_nms = args.spm
net.detect.use_spm_dist_nms = args.spm_dist
net.detect.use_spm_dist_weighted_nms = args.spm_dist_weighted
net.detect.use_cross_class_nms = args.cross_class_nms
cfg.mask_proto_debug = args.mask_proto_debug
# TODO Currently we do not support Fast Mask Re-scroing in evalimage, evalimages, and evalvideo
if args.image is not None:
if ':' in args.image:
inp, out = args.image.split(':')
evalimage(net, inp, out)
else:
evalimage(net, args.image)
return
elif args.images is not None:
inp, out = args.images.split(':')
evalimages(net, inp, out)
return
elif args.video is not None:
if ':' in args.video:
inp, out = args.video.split(':')
evalvideo(net, inp, out)
else:
evalvideo(net, args.video)
return
frame_times = MovingAverage()
dataset_size = len(dataset) if args.max_images < 0 else min(args.max_images, len(dataset))
progress_bar = ProgressBar(30, dataset_size)
print()
if not args.display and not args.benchmark:
# For each class and iou, stores tuples (score, isPositive)
# Index ap_data[type][iouIdx][classIdx]
ap_data = {
'box' : [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds],
'mask': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds]
}
detections = Detections()
else:
timer.disable('Load Data')
dataset_indices = list(range(len(dataset)))
if args.shuffle:
random.shuffle(dataset_indices)
elif not args.no_sort:
# Do a deterministic shuffle based on the image ids
#
# I do this because on python 3.5 dictionary key order is *random*, while in 3.6 it's
# the order of insertion. That means on python 3.6, the images come in the order they are in
# in the annotations file. For some reason, the first images in the annotations file are
# the hardest. To combat this, I use a hard-coded hash function based on the image ids
# to shuffle the indices we use. That way, no matter what python version or how pycocotools
# handles the data, we get the same result every time.
hashed = [badhash(x) for x in dataset.ids]
dataset_indices.sort(key=lambda x: hashed[x])
dataset_indices = dataset_indices[:dataset_size]
try:
# Main eval loop
for it, image_idx in enumerate(dataset_indices):
timer.reset()
with timer.env('Load Data'):
img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx)
# Test flag, do not upvote
if cfg.mask_proto_debug:
with open('scripts/info.txt', 'w') as f:
f.write(str(dataset.ids[image_idx]))
np.save('scripts/gt.npy', gt_masks)
batch = Variable(img.unsqueeze(0))
if args.cuda:
batch = batch.cuda()
with timer.env('Network Extra'):
preds = net(batch)
# Perform the meat of the operation here depending on our mode.
if args.display:
img_numpy = prep_display(preds, img, h, w)
elif args.benchmark:
prep_benchmark(preds, h, w)
else:
prep_metrics(ap_data, preds, img, gt, gt_masks, h, w, num_crowd, dataset.ids[image_idx], detections)
# First couple of images take longer because we're constructing the graph.
# Since that's technically initialization, don't include those in the FPS calculations.
if it > 1:
frame_times.add(timer.total_time())
if args.display:
if it > 1:
print('Avg FPS: %.4f' % (1 / frame_times.get_avg()))
plt.imshow(img_numpy)
plt.title(str(dataset.ids[image_idx]))
plt.show()
elif not args.no_bar:
if it > 1: fps = 1 / frame_times.get_avg()
else: fps = 0
progress = (it+1) / dataset_size * 100
progress_bar.set_val(it+1)
print('\rProcessing Images %s %6d / %6d (%5.2f%%) %5.2f fps '
% (repr(progress_bar), it+1, dataset_size, progress, fps), end='')
if not args.display and not args.benchmark:
print()
if args.output_coco_json:
print('Dumping detections...')
if args.output_web_json:
detections.dump_web()
else:
detections.dump()
else:
if not train_mode:
print('Saving data...')
with open(args.ap_data_file, 'wb') as f:
pickle.dump(ap_data, f)
return calc_map(ap_data)
elif args.benchmark:
print()
print()
print('Stats for the last frame:')
timer.print_stats()
avg_seconds = frame_times.get_avg()
print('Average: %5.2f fps, %5.2f ms' % (1 / frame_times.get_avg(), 1000*avg_seconds))
except KeyboardInterrupt:
print('Stopping...')
def calc_map(ap_data):
print('Calculating mAP...')
aps = [{'box': [], 'mask': []} for _ in iou_thresholds]
for _class in range(len(cfg.dataset.class_names)):
for iou_idx in range(len(iou_thresholds)):
for iou_type in ('box', 'mask'):
ap_obj = ap_data[iou_type][iou_idx][_class]
if not ap_obj.is_empty():
aps[iou_idx][iou_type].append(ap_obj.get_ap())
all_maps = {'box': OrderedDict(), 'mask': OrderedDict()}
# Looking back at it, this code is really hard to read :/
for iou_type in ('box', 'mask'):
all_maps[iou_type]['all'] = 0 # Make this first in the ordereddict
for i, threshold in enumerate(iou_thresholds):
mAP = sum(aps[i][iou_type]) / len(aps[i][iou_type]) * 100 if len(aps[i][iou_type]) > 0 else 0
all_maps[iou_type][int(threshold*100)] = mAP
all_maps[iou_type]['all'] = (sum(all_maps[iou_type].values()) / (len(all_maps[iou_type].values())-1))
print_maps(all_maps)
# Put in a prettier format so we can serialize it to json during training
all_maps = {k: {j: round(u, 2) for j, u in v.items()} for k, v in all_maps.items()}
return all_maps
def print_maps(all_maps):
# Warning: hacky
make_row = lambda vals: (' %5s |' * len(vals)) % tuple(vals)
make_sep = lambda n: ('-------+' * n)
print()
print(make_row([''] + [('.%d ' % x if isinstance(x, int) else x + ' ') for x in all_maps['box'].keys()]))
print(make_sep(len(all_maps['box']) + 1))
for iou_type in ('box', 'mask'):
print(make_row([iou_type] + ['%.2f' % x if x < 100 else '%.1f' % x for x in all_maps[iou_type].values()]))
print(make_sep(len(all_maps['box']) + 1))
print()
if __name__ == '__main__':
parse_args()
if args.config is not None:
set_cfg(args.config)
if args.trained_model == 'interrupt':
args.trained_model = SavePath.get_interrupt('weights/')
elif args.trained_model == 'latest':
args.trained_model = SavePath.get_latest('weights/', cfg.name)
if args.config is None:
model_path = SavePath.from_str(args.trained_model)
# TODO: Bad practice? Probably want to do a name lookup instead.
args.config = model_path.model_name + '_config'
print('Config not specified. Parsed %s from the file name.\n' % args.config)
set_cfg(args.config)
num_count=0
if args.cross_class_nms = True:
nms='cross class'
else:
nms='not use cross class'
if args.fast_nms = True:
num_count = num_count + 1
if args.cluster_nms = True:
num_count = num_count + 1
if args.cluster_diounms = True:
num_count = num_count + 1
if args.spm = True:
num_count = num_count + 1
if args.spm_dist = True:
num_count = num_count + 1
if args.spm_dist_weighted = True:
num_count = num_count + 1
if num_count>1:
assert Exception("You must choose one NMS strategy. Options: fast_nms, cluster_nms, cluster_diounms, spm, spm_dist, spm_dist_weighted.")
if args.detect:
cfg.eval_mask_branch = False
if args.dataset is not None:
set_dataset(args.dataset)
with torch.no_grad():
if not os.path.exists('results'):
os.makedirs('results')
if args.cuda:
cudnn.fastest = True
torch.set_default_tensor_type('torch.cuda.FloatTensor')
else:
torch.set_default_tensor_type('torch.FloatTensor')
if args.resume and not args.display:
with open(args.ap_data_file, 'rb') as f:
ap_data = pickle.load(f)
calc_map(ap_data)
exit()
if args.image is None and args.video is None and args.images is None:
dataset = COCODetection(cfg.dataset.valid_images, cfg.dataset.valid_info,
transform=BaseTransform(), has_gt=cfg.dataset.has_gt)
prep_coco_cats()
else:
dataset = None
print('Loading model...', end='')
net = Yolact()
net.load_weights(args.trained_model)
net.eval()
print(' Done.')
if args.cuda:
net = net.cuda()
evaluate(net, dataset)
================================================
FILE: external/DCNv2/LICENSE
================================================
BSD 3-Clause License
Copyright (c) 2019, Charles Shang
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
================================================
FILE: external/DCNv2/README.md
================================================
## Deformable Convolutional Networks V2 with Pytorch 1.0
### Build
```bash
./make.sh # build
python test.py # run examples and gradient check
```
### An Example
- deformable conv
```python
from dcn_v2 import DCN
input = torch.randn(2, 64, 128, 128).cuda()
# wrap all things (offset and mask) in DCN
dcn = DCN(64, 64, kernel_size=(3,3), stride=1, padding=1, deformable_groups=2).cuda()
output = dcn(input)
print(output.shape)
```
- deformable roi pooling
```python
from dcn_v2 import DCNPooling
input = torch.randn(2, 32, 64, 64).cuda()
batch_inds = torch.randint(2, (20, 1)).cuda().float()
x = torch.randint(256, (20, 1)).cuda().float()
y = torch.randint(256, (20, 1)).cuda().float()
w = torch.randint(64, (20, 1)).cuda().float()
h = torch.randint(64, (20, 1)).cuda().float()
rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
# mdformable pooling (V2)
# wrap all things (offset and mask) in DCNPooling
dpooling = DCNPooling(spatial_scale=1.0 / 4,
pooled_size=7,
output_dim=32,
no_trans=False,
group_size=1,
trans_std=0.1).cuda()
dout = dpooling(input, rois)
```
### Note
Now the master branch is for pytorch 1.0 (new ATen API), you can switch back to pytorch 0.4 with,
```bash
git checkout pytorch_0.4
```
### Known Issues:
- [x] Gradient check w.r.t offset (solved)
- [ ] Backward is not reentrant (minor)
This is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op).
<s>I have ran the gradient check for many times with DOUBLE type. Every tensor **except offset** passes.
However, when I set the offset to 0.5, it passes. I'm still wondering what cause this problem. Is it because some
non-differential points? </s>
Update: all gradient check passes with double precision.
Another issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for
float `<1e-15` for double),
so it may not be a serious problem (?)
Please post an issue or PR if you have any comments.
================================================
FILE: external/DCNv2/dcn_v2.py
================================================
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import math
import torch
from torch import nn
from torch.autograd import Function
from torch.nn.modules.utils import _pair
from torch.autograd.function import once_differentiable
import _ext as _backend
class _DCNv2(Function):
@staticmethod
def forward(ctx, input, offset, mask, weight, bias,
stride, padding, dilation, deformable_groups):
ctx.stride = _pair(stride)
ctx.padding = _pair(padding)
ctx.dilation = _pair(dilation)
ctx.kernel_size = _pair(weight.shape[2:4])
ctx.deformable_groups = deformable_groups
output = _backend.dcn_v2_forward(input, weight, bias,
offset, mask,
ctx.kernel_size[0], ctx.kernel_size[1],
ctx.stride[0], ctx.stride[1],
ctx.padding[0], ctx.padding[1],
ctx.dilation[0], ctx.dilation[1],
ctx.deformable_groups)
ctx.save_for_backward(input, offset, mask, weight, bias)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
input, offset, mask, weight, bias = ctx.saved_tensors
grad_input, grad_offset, grad_mask, grad_weight, grad_bias = \
_backend.dcn_v2_backward(input, weight,
bias,
offset, mask,
grad_output,
ctx.kernel_size[0], ctx.kernel_size[1],
ctx.stride[0], ctx.stride[1],
ctx.padding[0], ctx.padding[1],
ctx.dilation[0], ctx.dilation[1],
ctx.deformable_groups)
return grad_input, grad_offset, grad_mask, grad_weight, grad_bias,\
None, None, None, None,
dcn_v2_conv = _DCNv2.apply
class DCNv2(nn.Module):
def __init__(self, in_channels, out_channels,
kernel_size, stride, padding, dilation=1, deformable_groups=1):
super(DCNv2, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = _pair(kernel_size)
self.stride = _pair(stride)
self.padding = _pair(padding)
self.dilation = _pair(dilation)
self.deformable_groups = deformable_groups
self.weight = nn.Parameter(torch.Tensor(
out_channels, in_channels, *self.kernel_size))
self.bias = nn.Parameter(torch.Tensor(out_channels))
self.reset_parameters()
def reset_parameters(self):
n = self.in_channels
for k in self.kernel_size:
n *= k
stdv = 1. / math.sqrt(n)
self.weight.data.uniform_(-stdv, stdv)
self.bias.data.zero_()
def forward(self, input, offset, mask):
assert 2 * self.deformable_groups * self.kernel_size[0] * self.kernel_size[1] == \
offset.shape[1]
assert self.deformable_groups * self.kernel_size[0] * self.kernel_size[1] == \
mask.shape[1]
return dcn_v2_conv(input, offset, mask,
self.weight,
self.bias,
self.stride,
self.padding,
self.dilation,
self.deformable_groups)
class DCN(DCNv2):
def __init__(self, in_channels, out_channels,
kernel_size, stride, padding,
dilation=1, deformable_groups=1):
super(DCN, self).__init__(in_channels, out_channels,
kernel_size, stride, padding, dilation, deformable_groups)
channels_ = self.deformable_groups * 3 * self.kernel_size[0] * self.kernel_size[1]
self.conv_offset_mask = nn.Conv2d(self.in_channels,
channels_,
kernel_size=self.kernel_size,
stride=self.stride,
padding=self.padding,
bias=True)
self.init_offset()
def init_offset(self):
self.conv_offset_mask.weight.data.zero_()
self.conv_offset_mask.bias.data.zero_()
def forward(self, input):
out = self.conv_offset_mask(input)
o1, o2, mask = torch.chunk(out, 3, dim=1)
offset = torch.cat((o1, o2), dim=1)
mask = torch.sigmoid(mask)
return dcn_v2_conv(input, offset, mask,
self.weight, self.bias,
self.stride,
self.padding,
self.dilation,
self.deformable_groups)
class _DCNv2Pooling(Function):
@staticmethod
def forward(ctx, input, rois, offset,
spatial_scale,
pooled_size,
output_dim,
no_trans,
group_size=1,
part_size=None,
sample_per_part=4,
trans_std=.0):
ctx.spatial_scale = spatial_scale
ctx.no_trans = int(no_trans)
ctx.output_dim = output_dim
ctx.group_size = group_size
ctx.pooled_size = pooled_size
ctx.part_size = pooled_size if part_size is None else part_size
ctx.sample_per_part = sample_per_part
ctx.trans_std = trans_std
output, output_count = \
_backend.dcn_v2_psroi_pooling_forward(input, rois, offset,
ctx.no_trans, ctx.spatial_scale,
ctx.output_dim, ctx.group_size,
ctx.pooled_size, ctx.part_size,
ctx.sample_per_part, ctx.trans_std)
ctx.save_for_backward(input, rois, offset, output_count)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
input, rois, offset, output_count = ctx.saved_tensors
grad_input, grad_offset = \
_backend.dcn_v2_psroi_pooling_backward(grad_output,
input,
rois,
offset,
output_count,
ctx.no_trans,
ctx.spatial_scale,
ctx.output_dim,
ctx.group_size,
ctx.pooled_size,
ctx.part_size,
ctx.sample_per_part,
ctx.trans_std)
return grad_input, None, grad_offset, \
None, None, None, None, None, None, None, None
dcn_v2_pooling = _DCNv2Pooling.apply
class DCNv2Pooling(nn.Module):
def __init__(self,
spatial_scale,
pooled_size,
output_dim,
no_trans,
group_size=1,
part_size=None,
sample_per_part=4,
trans_std=.0):
super(DCNv2Pooling, self).__init__()
self.spatial_scale = spatial_scale
self.pooled_size = pooled_size
self.output_dim = output_dim
self.no_trans = no_trans
self.group_size = group_size
self.part_size = pooled_size if part_size is None else part_size
self.sample_per_part = sample_per_part
self.trans_std = trans_std
def forward(self, input, rois, offset):
assert input.shape[1] == self.output_dim
if self.no_trans:
offset = input.new()
return dcn_v2_pooling(input, rois, offset,
self.spatial_scale,
self.pooled_size,
self.output_dim,
self.no_trans,
self.group_size,
self.part_size,
self.sample_per_part,
self.trans_std)
class DCNPooling(DCNv2Pooling):
def __init__(self,
spatial_scale,
pooled_size,
output_dim,
no_trans,
group_size=1,
part_size=None,
sample_per_part=4,
trans_std=.0,
deform_fc_dim=1024):
super(DCNPooling, self).__ini
gitextract_fc4uicbd/ ├── .gitignore ├── LICENSE ├── README.md ├── README_zh-CN.md ├── backbone.py ├── data/ │ ├── __init__.py │ ├── coco.py │ ├── config.py │ ├── grid.npy │ └── scripts/ │ ├── COCO.sh │ ├── COCO_test.sh │ └── mix_sets.py ├── environment.yml ├── eval.py ├── external/ │ └── DCNv2/ │ ├── LICENSE │ ├── README.md │ ├── dcn_v2.py │ ├── setup.py │ ├── src/ │ │ ├── cpu/ │ │ │ ├── dcn_v2_cpu.cpp │ │ │ └── vision.h │ │ ├── cuda/ │ │ │ ├── dcn_v2_cuda.cu │ │ │ ├── dcn_v2_im2col_cuda.cu │ │ │ ├── dcn_v2_im2col_cuda.h │ │ │ ├── dcn_v2_psroi_pooling_cuda.cu │ │ │ └── vision.h │ │ ├── dcn_v2.h │ │ └── vision.cpp │ └── test.py ├── layers/ │ ├── __init__.py │ ├── box_utils.py │ ├── functions/ │ │ ├── __init__.py │ │ └── detection.py │ ├── interpolate.py │ ├── modules/ │ │ ├── __init__.py │ │ └── multibox_loss.py │ └── output_utils.py ├── run_coco_eval.py ├── scripts/ │ ├── augment_bbox.py │ ├── bbox_recall.py │ ├── cluster_bbox_sizes.py │ ├── compute_masks.py │ ├── convert_darknet.py │ ├── convert_sbd.py │ ├── eval.sh │ ├── make_grid.py │ ├── optimize_bboxes.py │ ├── parse_eval.py │ ├── plot_loss.py │ ├── resume.sh │ ├── save_bboxes.py │ ├── train.sh │ └── unpack_statedict.py ├── train.py ├── utils/ │ ├── __init__.py │ ├── augmentations.py │ ├── cython_nms.pyx │ ├── functions.py │ ├── logger.py │ ├── nvinfo.py │ └── timer.py ├── web/ │ ├── css/ │ │ ├── index.css │ │ ├── list.css │ │ ├── toggle.css │ │ └── viewer.css │ ├── dets/ │ │ ├── ssd300.json │ │ ├── ssd550.json │ │ ├── ssd550_resnet101.json │ │ ├── test.json │ │ ├── yolact_base.json │ │ ├── yolact_darknet53.json │ │ ├── yolact_im700.json │ │ ├── yolact_resnet101_conv4.json │ │ ├── yolact_resnet101_maskrcnn.json │ │ ├── yolact_resnet101_maskrcnn_1.json │ │ ├── yolact_resnet50.json │ │ ├── yrm12.json │ │ ├── yrm13.json │ │ ├── yrm16_2.json │ │ ├── yrm18.json │ │ ├── yrm19.json │ │ ├── yrm21.json │ │ ├── yrm25_b.json │ │ ├── yrm28_2_perfect.json │ │ ├── yrm35_crop.json │ │ └── yrm35_retina.json │ ├── index.html │ ├── iou.html │ ├── scripts/ │ │ ├── index.js │ │ ├── iou.js │ │ ├── jquery.js │ │ ├── js.cookie.js │ │ ├── utils.js │ │ └── viewer.js │ ├── server.py │ └── viewer.html └── yolact.py
SYMBOL INDEX (503 symbols across 36 files)
FILE: backbone.py
function DCN (line 10) | def DCN(*args, **kwdargs):
class Bottleneck (line 13) | class Bottleneck(nn.Module):
method __init__ (line 17) | def __init__(self, inplanes, planes, stride=1, downsample=None, norm_l...
method forward (line 37) | def forward(self, x):
class ResNetBackbone (line 60) | class ResNetBackbone(nn.Module):
method __init__ (line 63) | def __init__(self, layers, dcn_layers=[0, 0, 0, 0], dcn_interval=1, at...
method _make_layer (line 94) | def _make_layer(self, block, planes, blocks, stride=1, dcn_layers=0, d...
method forward (line 126) | def forward(self, x):
method init_backbone (line 141) | def init_backbone(self, path):
method add_layer (line 156) | def add_layer(self, conv_channels=1024, downsample=2, depth=1, block=B...
class ResNetBackboneGN (line 163) | class ResNetBackboneGN(ResNetBackbone):
method __init__ (line 165) | def __init__(self, layers, num_groups=32):
method init_backbone (line 168) | def init_backbone(self, path):
function darknetconvlayer (line 222) | def darknetconvlayer(in_channels, out_channels, *args, **kwdargs):
class DarkNetBlock (line 235) | class DarkNetBlock(nn.Module):
method __init__ (line 240) | def __init__(self, in_channels, channels):
method forward (line 246) | def forward(self, x):
class DarkNetBackbone (line 252) | class DarkNetBackbone(nn.Module):
method __init__ (line 260) | def __init__(self, layers=[1, 2, 8, 8, 4], block=DarkNetBlock):
method _make_layer (line 283) | def _make_layer(self, block, channels, num_blocks, stride=2):
method forward (line 299) | def forward(self, x):
method add_layer (line 311) | def add_layer(self, conv_channels=1024, stride=2, depth=1, block=DarkN...
method init_backbone (line 315) | def init_backbone(self, path):
class VGGBackbone (line 324) | class VGGBackbone(nn.Module):
method __init__ (line 335) | def __init__(self, cfg, extra_args=[], norm_layers=[]):
method _make_layer (line 359) | def _make_layer(self, cfg):
method forward (line 400) | def forward(self, x):
method transform_key (line 415) | def transform_key(self, k):
method init_backbone (line 421) | def init_backbone(self, path):
method add_layer (line 428) | def add_layer(self, conv_channels=128, downsample=2):
function construct_backbone (line 449) | def construct_backbone(cfg):
FILE: data/coco.py
function get_label_map (line 13) | def get_label_map():
class COCOAnnotationTransform (line 19) | class COCOAnnotationTransform(object):
method __init__ (line 23) | def __init__(self):
method __call__ (line 26) | def __call__(self, target, width, height):
class COCODetection (line 52) | class COCODetection(data.Dataset):
method __init__ (line 64) | def __init__(self, image_path, info_file, transform=None,
method __getitem__ (line 86) | def __getitem__(self, index):
method __len__ (line 97) | def __len__(self):
method pull_item (line 100) | def pull_item(self, index):
method pull_image (line 178) | def pull_image(self, index):
method pull_anno (line 193) | def pull_anno(self, index):
method __repr__ (line 209) | def __repr__(self):
function enforce_size (line 219) | def enforce_size(img, targets, masks, num_crowds, new_w, new_h):
function detection_collate (line 260) | def detection_collate(batch):
FILE: data/config.py
class Config (line 61) | class Config(object):
method __init__ (line 70) | def __init__(self, config_dict):
method copy (line 74) | def copy(self, new_config_dict={}):
method replace (line 87) | def replace(self, new_config_dict):
method print (line 98) | def print(self):
function set_cfg (line 812) | def set_cfg(config_name:str):
function set_dataset (line 823) | def set_dataset(dataset_name:str):
FILE: eval.py
function str2bool (line 32) | def str2bool(v):
function parse_args (line 40) | def parse_args(argv=None):
function prep_display (line 145) | def prep_display(dets_out, img, h, w, undo_transform=True, class_color=F...
function prep_benchmark (line 274) | def prep_benchmark(dets_out, h, w):
function prep_coco_cats (line 293) | def prep_coco_cats():
function get_coco_cat (line 301) | def get_coco_cat(transformed_cat_id):
function get_transformed_cat (line 305) | def get_transformed_cat(coco_cat_id):
class Detections (line 310) | class Detections:
method __init__ (line 312) | def __init__(self):
method add_bbox (line 316) | def add_bbox(self, image_id:int, category_id:int, bbox:list, score:flo...
method add_mask (line 330) | def add_mask(self, image_id:int, category_id:int, segmentation:np.ndar...
method dump (line 342) | def dump(self):
method dump_web (line 352) | def dump_web(self):
function _mask_iou (line 386) | def _mask_iou(mask1, mask2, iscrowd=False):
function _bbox_iou (line 391) | def _bbox_iou(bbox1, bbox2, iscrowd=False):
function prep_metrics (line 396) | def prep_metrics(ap_data, dets, img, gt, gt_masks, h, w, num_crowd, imag...
class APDataObject (line 523) | class APDataObject:
method __init__ (line 529) | def __init__(self):
method push (line 533) | def push(self, score:float, is_true:bool):
method add_gt_positives (line 536) | def add_gt_positives(self, num_positives:int):
method is_empty (line 540) | def is_empty(self) -> bool:
method get_ap (line 543) | def get_ap(self) -> float:
function badhash (line 593) | def badhash(x):
function evalimage (line 605) | def evalimage(net:Yolact, path:str, save_path:str=None):
function evalimages (line 622) | def evalimages(net:Yolact, input_folder:str, output_folder:str):
class CustomDataParallel (line 640) | class CustomDataParallel(torch.nn.DataParallel):
method gather (line 642) | def gather(self, outputs, output_device):
function evalvideo (line 646) | def evalvideo(net:Yolact, path:str, out_path:str=None):
function evaluate (line 880) | def evaluate(net:Yolact, dataset, train_mode=False):
function calc_map (line 1021) | def calc_map(ap_data):
function print_maps (line 1049) | def print_maps(all_maps):
FILE: external/DCNv2/dcn_v2.py
class _DCNv2 (line 16) | class _DCNv2(Function):
method forward (line 18) | def forward(ctx, input, offset, mask, weight, bias,
method backward (line 37) | def backward(ctx, grad_output):
class DCNv2 (line 57) | class DCNv2(nn.Module):
method __init__ (line 59) | def __init__(self, in_channels, out_channels,
method reset_parameters (line 75) | def reset_parameters(self):
method forward (line 83) | def forward(self, input, offset, mask):
class DCN (line 97) | class DCN(DCNv2):
method __init__ (line 99) | def __init__(self, in_channels, out_channels,
method init_offset (line 114) | def init_offset(self):
method forward (line 118) | def forward(self, input):
class _DCNv2Pooling (line 132) | class _DCNv2Pooling(Function):
method forward (line 134) | def forward(ctx, input, rois, offset,
method backward (line 163) | def backward(ctx, grad_output):
class DCNv2Pooling (line 187) | class DCNv2Pooling(nn.Module):
method __init__ (line 189) | def __init__(self,
method forward (line 208) | def forward(self, input, rois, offset):
class DCNPooling (line 223) | class DCNPooling(DCNv2Pooling):
method __init__ (line 225) | def __init__(self,
method forward (line 259) | def forward(self, input, rois):
FILE: external/DCNv2/setup.py
function get_extensions (line 17) | def get_extensions():
FILE: external/DCNv2/src/cpu/dcn_v2_cpu.cpp
function dcn_v2_cpu_forward (line 7) | at::Tensor
function dcn_v2_cpu_backward (line 26) | std::vector<at::Tensor>
function dcn_v2_psroi_pooling_cpu_forward (line 42) | std::tuple<at::Tensor, at::Tensor>
function dcn_v2_psroi_pooling_cpu_backward (line 58) | std::tuple<at::Tensor, at::Tensor>
FILE: external/DCNv2/src/vision.cpp
function PYBIND11_MODULE (line 4) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: external/DCNv2/test.py
function conv_identify (line 20) | def conv_identify(weight, bias):
function check_zero_offset (line 32) | def check_zero_offset():
function check_gradient_dconv (line 69) | def check_gradient_dconv():
function check_pooling_zero_offset (line 100) | def check_pooling_zero_offset():
function check_gradient_dpooling (line 134) | def check_gradient_dpooling():
function example_dconv (line 169) | def example_dconv():
function example_dpooling (line 183) | def example_dpooling():
function example_mdpooling (line 226) | def example_mdpooling():
FILE: layers/box_utils.py
function point_form (line 8) | def point_form(boxes):
function center_size (line 21) | def center_size(boxes):
function intersect (line 33) | def intersect(box_a, box_b):
function jaccard (line 54) | def jaccard(box_a, box_b, iscrowd:bool=False):
function diou (line 82) | def diou(box_a, box_b, iscrowd:bool=False):
function distance (line 128) | def distance(box_a, box_b, iscrowd:bool=False):
function elemwise_box_iou (line 170) | def elemwise_box_iou(box_a, box_b):
function mask_iou (line 186) | def mask_iou(masks_a, masks_b, iscrowd=False):
function elemwise_mask_iou (line 203) | def elemwise_mask_iou(masks_a, masks_b):
function change (line 217) | def change(gt, priors):
function match (line 247) | def match(pos_thresh, neg_thresh, truths, priors, labels, crowd_boxes, l...
function encode (line 318) | def encode(matched, priors, use_yolo_regressors:bool=False):
function decode (line 356) | def decode(loc, priors, use_yolo_regressors:bool=False):
function log_sum_exp (line 404) | def log_sum_exp(x):
function sanitize_coordinates (line 416) | def sanitize_coordinates(_x1, _x2, img_size:int, padding:int=0, cast:boo...
function crop (line 438) | def crop(masks, boxes, padding:int=1):
function index2d (line 464) | def index2d(src, idx):
FILE: layers/functions/detection.py
class Detect (line 11) | class Detect(object):
method __init__ (line 19) | def __init__(self, num_classes, bkg_label, top_k, conf_thresh, nms_thr...
method __call__ (line 37) | def __call__(self, predictions, net):
method detect (line 86) | def detect(self, batch_idx, conf_preds, decoded_boxes, mask_data, inst...
method cc_fast_nms (line 133) | def cc_fast_nms(self, boxes, masks, scores, iou_threshold:float=0.5, t...
method cc_cluster_nms (line 144) | def cc_cluster_nms(self, boxes, masks, scores, iou_threshold:float=0.5...
method cc_cluster_diounms (line 162) | def cc_cluster_diounms(self, boxes, masks, scores, iou_threshold:float...
method cc_cluster_SPM_nms (line 180) | def cc_cluster_SPM_nms(self, boxes, masks, scores, iou_threshold:float...
method cc_cluster_SPM_dist_nms (line 203) | def cc_cluster_SPM_dist_nms(self, boxes, masks, scores, iou_threshold:...
method cc_cluster_SPM_dist_weighted_nms (line 229) | def cc_cluster_SPM_dist_weighted_nms(self, boxes, masks, scores, iou_t...
method fast_nms (line 267) | def fast_nms(self, boxes, masks, scores, iou_threshold:float=0.5, top_...
method cluster_nms (line 310) | def cluster_nms(self, boxes, masks, scores, iou_threshold:float=0.5, t...
method cluster_diounms (line 346) | def cluster_diounms(self, boxes, masks, scores, iou_threshold:float=0....
method cluster_SPM_nms (line 381) | def cluster_SPM_nms(self, boxes, masks, scores, iou_threshold:float=0....
method cluster_SPM_dist_nms (line 421) | def cluster_SPM_dist_nms(self, boxes, masks, scores, iou_threshold:flo...
method cluster_SPM_dist_weighted_nms (line 462) | def cluster_SPM_dist_weighted_nms(self, boxes, masks, scores, iou_thre...
method traditional_nms_yolact (line 519) | def traditional_nms_yolact(self, boxes, masks, scores, iou_threshold=0...
method traditional_nms_ours (line 567) | def traditional_nms_ours(self, boxes, masks, scores, iou_threshold=0.5...
FILE: layers/interpolate.py
class InterpolateModule (line 4) | class InterpolateModule(nn.Module):
method __init__ (line 10) | def __init__(self, *args, **kwdargs):
method forward (line 16) | def forward(self, x):
FILE: layers/modules/multibox_loss.py
function ciou (line 11) | def ciou(bboxes1, bboxes2):
function diou (line 62) | def diou(bboxes1, bboxes2):
class MultiBoxLoss (line 109) | class MultiBoxLoss(nn.Module):
method __init__ (line 132) | def __init__(self, num_classes, pos_threshold, neg_threshold, negpos_r...
method forward (line 149) | def forward(self, net, predictions, targets, masks, num_crowds):
method class_existence_loss (line 320) | def class_existence_loss(self, class_data, class_existence_t):
method semantic_segmentation_loss (line 323) | def semantic_segmentation_loss(self, segment_data, mask_t, class_t, in...
method ohem_conf_loss (line 347) | def ohem_conf_loss(self, conf_data, conf_t, pos, num):
method focal_conf_loss (line 403) | def focal_conf_loss(self, conf_data, conf_t):
method focal_conf_sigmoid_loss (line 433) | def focal_conf_sigmoid_loss(self, conf_data, conf_t):
method focal_conf_objectness_loss (line 464) | def focal_conf_objectness_loss(self, conf_data, conf_t):
method conf_objectness_loss (line 497) | def conf_objectness_loss(self, conf_data, conf_t, batch_size, loc_p, l...
method direct_mask_loss (line 536) | def direct_mask_loss(self, pos_idx, idx_t, loc_data, mask_data, priors...
method coeff_diversity_loss (line 580) | def coeff_diversity_loss(self, coeffs, instance_t):
method lincomb_mask_loss (line 604) | def lincomb_mask_loss(self, pos, idx_t, loc_data, mask_data, priors, p...
method _mask_iou (line 781) | def _mask_iou(self, mask1, mask2):
method mask_iou_loss (line 789) | def mask_iou_loss(self, net, maskiou_targets):
FILE: layers/output_utils.py
function postprocess (line 15) | def postprocess(det_output, w, h, batch_idx=0, interpolation_mode='bilin...
function undo_image_transformation (line 128) | def undo_image_transformation(img, w, h):
function display_lincomb (line 147) | def display_lincomb(proto_data, masks):
FILE: scripts/augment_bbox.py
function augment_boxes (line 18) | def augment_boxes(bboxes):
function prep_box (line 27) | def prep_box(box_list):
function intersect (line 91) | def intersect(box_a, box_b):
function jaccard_numpy (line 98) | def jaccard_numpy(box_a, box_b):
function random_sample_crop (line 118) | def random_sample_crop(height, width, boxes=None):
FILE: scripts/bbox_recall.py
function intersect (line 24) | def intersect(box_a, box_b):
function jaccard (line 45) | def jaccard(box_a, box_b, iscrowd=False):
function to_relative (line 70) | def to_relative(bboxes):
function make_priors (line 74) | def make_priors(conv_size, scales, aspect_ratios):
FILE: scripts/cluster_bbox_sizes.py
function to_relative (line 21) | def to_relative(bboxes):
function process (line 24) | def process(bboxes):
FILE: scripts/compute_masks.py
function mask_iou (line 10) | def mask_iou(mask1, mask2):
function paint_mask (line 22) | def paint_mask(img_numpy, mask, color):
function logit (line 41) | def logit(x):
function sigmoid (line 44) | def sigmoid(x):
FILE: scripts/convert_sbd.py
function mask2bbox (line 6) | def mask2bbox(mask):
FILE: scripts/make_grid.py
function render (line 27) | def render():
function update_scale (line 63) | def update_scale(val):
function update_angle (line 69) | def update_angle(val):
function update_centerx (line 75) | def update_centerx(val):
function update_centery (line 81) | def update_centery(val):
function update_spacing (line 87) | def update_spacing(val):
function randomize (line 93) | def randomize(val):
function add (line 113) | def add(val):
function add_randomize (line 123) | def add_randomize(val):
function export (line 127) | def export(val):
function test_uniqueness (line 138) | def test_uniqueness(grids):
FILE: scripts/optimize_bboxes.py
function intersect (line 24) | def intersect(box_a, box_b):
function jaccard (line 45) | def jaccard(box_a, box_b, iscrowd=False):
function to_relative (line 70) | def to_relative(bboxes):
function make_priors (line 74) | def make_priors(conv_size, scales, aspect_ratios):
function compute_hits (line 104) | def compute_hits(bboxes, anchors, iou_threshold=0.5):
function compute_recall (line 110) | def compute_recall(hits, base_hits):
function step (line 115) | def step(x, x_func, bboxes, base_hits, optim_idx):
function optimize (line 124) | def optimize(full_bboxes, optim_idx, batch_size=5000):
function pretty_str (line 157) | def pretty_str(x:list):
function print_out (line 182) | def print_out():
FILE: scripts/parse_eval.py
function grabMAP (line 20) | def grabMAP(string):
FILE: scripts/plot_loss.py
function smoother (line 35) | def smoother(y, interval=100):
function plot_train (line 44) | def plot_train(data):
function plot_val (line 63) | def plot_val(data):
FILE: train.py
function str2bool (line 27) | def str2bool(v):
function replace (line 101) | def replace(name):
class NetLoss (line 132) | class NetLoss(nn.Module):
method __init__ (line 138) | def __init__(self, net:Yolact, criterion:MultiBoxLoss):
method forward (line 144) | def forward(self, images, targets, masks, num_crowds):
class CustomDataParallel (line 149) | class CustomDataParallel(nn.DataParallel):
method scatter (line 155) | def scatter(self, inputs, kwargs, device_ids):
method gather (line 164) | def gather(self, outputs, output_device):
function train (line 172) | def train():
function set_lr (line 388) | def set_lr(optimizer, new_lr):
function gradinator (line 395) | def gradinator(x):
function prepare_data (line 399) | def prepare_data(datum, devices:list=None, allocation:list=None):
function no_inf_mean (line 439) | def no_inf_mean(x:torch.Tensor):
function compute_validation_loss (line 452) | def compute_validation_loss(net, data_loader, criterion):
function compute_validation_map (line 485) | def compute_validation_map(epoch, iteration, yolact_net, dataset, log:Lo...
function setup_eval (line 500) | def setup_eval():
FILE: utils/augmentations.py
function intersect (line 12) | def intersect(box_a, box_b):
function jaccard_numpy (line 19) | def jaccard_numpy(box_a, box_b):
class Compose (line 39) | class Compose(object):
method __init__ (line 50) | def __init__(self, transforms):
method __call__ (line 53) | def __call__(self, img, masks=None, boxes=None, labels=None):
class Lambda (line 59) | class Lambda(object):
method __init__ (line 62) | def __init__(self, lambd):
method __call__ (line 66) | def __call__(self, img, masks=None, boxes=None, labels=None):
class ConvertFromInts (line 70) | class ConvertFromInts(object):
method __call__ (line 71) | def __call__(self, image, masks=None, boxes=None, labels=None):
class ToAbsoluteCoords (line 76) | class ToAbsoluteCoords(object):
method __call__ (line 77) | def __call__(self, image, masks=None, boxes=None, labels=None):
class ToPercentCoords (line 87) | class ToPercentCoords(object):
method __call__ (line 88) | def __call__(self, image, masks=None, boxes=None, labels=None):
class Pad (line 98) | class Pad(object):
method __init__ (line 105) | def __init__(self, width, height, mean=MEANS, pad_gt=True):
method __call__ (line 111) | def __call__(self, image, masks, boxes=None, labels=None):
class Resize (line 129) | class Resize(object):
method calc_size_preserve_ar (line 133) | def calc_size_preserve_ar(img_w, img_h, max_size):
method __init__ (line 140) | def __init__(self, resize_gt=True):
method __call__ (line 145) | def __call__(self, image, masks, boxes, labels=None):
class RandomSaturation (line 183) | class RandomSaturation(object):
method __init__ (line 184) | def __init__(self, lower=0.5, upper=1.5):
method __call__ (line 190) | def __call__(self, image, masks=None, boxes=None, labels=None):
class RandomHue (line 197) | class RandomHue(object):
method __init__ (line 198) | def __init__(self, delta=18.0):
method __call__ (line 202) | def __call__(self, image, masks=None, boxes=None, labels=None):
class RandomLightingNoise (line 210) | class RandomLightingNoise(object):
method __init__ (line 211) | def __init__(self):
method __call__ (line 216) | def __call__(self, image, masks=None, boxes=None, labels=None):
class ConvertColor (line 226) | class ConvertColor(object):
method __init__ (line 227) | def __init__(self, current='BGR', transform='HSV'):
method __call__ (line 231) | def __call__(self, image, masks=None, boxes=None, labels=None):
class RandomContrast (line 241) | class RandomContrast(object):
method __init__ (line 242) | def __init__(self, lower=0.5, upper=1.5):
method __call__ (line 249) | def __call__(self, image, masks=None, boxes=None, labels=None):
class RandomBrightness (line 256) | class RandomBrightness(object):
method __init__ (line 257) | def __init__(self, delta=32):
method __call__ (line 262) | def __call__(self, image, masks=None, boxes=None, labels=None):
class ToCV2Image (line 269) | class ToCV2Image(object):
method __call__ (line 270) | def __call__(self, tensor, masks=None, boxes=None, labels=None):
class ToTensor (line 274) | class ToTensor(object):
method __call__ (line 275) | def __call__(self, cvimage, masks=None, boxes=None, labels=None):
class RandomSampleCrop (line 279) | class RandomSampleCrop(object):
method __init__ (line 292) | def __init__(self):
method __call__ (line 305) | def __call__(self, image, masks, boxes=None, labels=None):
class Expand (line 408) | class Expand(object):
method __init__ (line 409) | def __init__(self, mean):
method __call__ (line 412) | def __call__(self, image, masks, boxes, labels):
class RandomMirror (line 443) | class RandomMirror(object):
method __call__ (line 444) | def __call__(self, image, masks, boxes, labels):
class RandomFlip (line 454) | class RandomFlip(object):
method __call__ (line 455) | def __call__(self, image, masks, boxes, labels):
class RandomRot90 (line 465) | class RandomRot90(object):
method __call__ (line 466) | def __call__(self, image, masks, boxes, labels):
class SwapChannels (line 478) | class SwapChannels(object):
method __init__ (line 486) | def __init__(self, swaps):
method __call__ (line 489) | def __call__(self, image):
class PhotometricDistort (line 504) | class PhotometricDistort(object):
method __init__ (line 505) | def __init__(self):
method __call__ (line 517) | def __call__(self, image, masks, boxes, labels):
class PrepareMasks (line 527) | class PrepareMasks(object):
method __init__ (line 534) | def __init__(self, mask_size, use_gt_bboxes):
method __call__ (line 538) | def __call__(self, image, masks, boxes, labels=None):
class BackboneTransform (line 566) | class BackboneTransform(object):
method __init__ (line 574) | def __init__(self, transform, mean, std, in_channel_order):
method __call__ (line 583) | def __call__(self, img, masks=None, boxes=None, labels=None):
class BaseTransform (line 601) | class BaseTransform(object):
method __init__ (line 604) | def __init__(self, mean=MEANS, std=STD):
method __call__ (line 611) | def __call__(self, img, masks=None, boxes=None, labels=None):
class FastBaseTransform (line 616) | class FastBaseTransform(torch.nn.Module):
method __init__ (line 623) | def __init__(self):
method forward (line 630) | def forward(self, img):
function do_nothing (line 660) | def do_nothing(img=None, masks=None, boxes=None, labels=None):
function enable_if (line 664) | def enable_if(condition, obj):
class SSDAugmentation (line 667) | class SSDAugmentation(object):
method __init__ (line 670) | def __init__(self, mean=MEANS, std=STD):
method __call__ (line 687) | def __call__(self, img, masks, boxes, labels):
FILE: utils/functions.py
class MovingAverage (line 9) | class MovingAverage():
method __init__ (line 12) | def __init__(self, max_window_size=1000):
method add (line 16) | def add(self, elem):
method append (line 28) | def append(self, elem):
method reset (line 32) | def reset(self):
method get_avg (line 37) | def get_avg(self):
method __str__ (line 41) | def __str__(self):
method __repr__ (line 44) | def __repr__(self):
method __len__ (line 47) | def __len__(self):
class ProgressBar (line 51) | class ProgressBar():
method __init__ (line 54) | def __init__(self, length, max_val):
method set_val (line 62) | def set_val(self, new_val):
method is_finished (line 72) | def is_finished(self):
method _update_str (line 75) | def _update_str(self):
method __repr__ (line 82) | def __repr__(self):
method __str__ (line 85) | def __str__(self):
function init_console (line 89) | def init_console():
class SavePath (line 98) | class SavePath:
method __init__ (line 105) | def __init__(self, model_name:str, epoch:int, iteration:int):
method get_path (line 110) | def get_path(self, root:str=''):
method from_str (line 115) | def from_str(path:str):
method remove_interrupt (line 133) | def remove_interrupt(save_folder):
method get_interrupt (line 138) | def get_interrupt(save_folder):
method get_latest (line 144) | def get_latest(save_folder, config):
function make_net (line 163) | def make_net(in_channels, conf, include_last_relu=True):
FILE: utils/logger.py
class Log (line 21) | class Log:
method __init__ (line 34) | def __init__(self, log_name:str, log_dir:str='logs/', session_data:dic...
method _log_session_header (line 72) | def _log_session_header(self, session_data:dict):
method log (line 98) | def log(self, type:str, data:dict={}, **kwdargs):
class LogEntry (line 130) | class LogEntry():
method __init__ (line 133) | def __init__(self, entry:Union[dict, list]):
method __getattr__ (line 136) | def __getattr__(self, name):
method __getitem__ (line 147) | def __getitem__(self, name):
method __len__ (line 150) | def __len__(self):
class LogVisualizer (line 153) | class LogVisualizer():
method __init__ (line 169) | def __init__(self):
method _decode (line 174) | def _decode(self, query:str) -> list:
method _follow (line 189) | def _follow(self, entry:LogEntry, query:list):
method _color (line 205) | def _color(self, idx:int):
method sessions (line 208) | def sessions(self, path:str):
method add (line 241) | def add(self, path:str, session:Union[int,list]=None):
method query (line 290) | def query(self, x:Union[str, list], entry_type:str=None, x_idx:int=Non...
method check (line 321) | def check(self, entry_type:str, x:str):
method plot (line 338) | def plot(self, entry_type:str, x:str, y:str, smoothness:int=0):
method bar (line 375) | def bar(self, entry_type:str, x:str, labels:list=None, diff:bool=False...
method elapsed_time (line 454) | def elapsed_time(self, cond1:str='', cond2:str='', legible:bool=True) ...
FILE: utils/nvinfo.py
function gpu_info (line 7) | def gpu_info() -> list:
function nvsmi_available (line 43) | def nvsmi_available() -> bool:
function visible_gpus (line 48) | def visible_gpus() -> list:
function _run_cmd (line 59) | def _run_cmd(cmd:list) -> list:
FILE: utils/timer.py
function disable_all (line 11) | def disable_all():
function enable_all (line 15) | def enable_all():
function disable (line 19) | def disable(fn_name):
function enable (line 23) | def enable(fn_name):
function reset (line 27) | def reset():
function start (line 35) | def start(fn_name, use_stack=True):
function stop (line 55) | def stop(fn_name=None, use_stack=True):
function print_stats (line 85) | def print_stats():
function total_time (line 109) | def total_time():
class env (line 114) | class env():
method __init__ (line 122) | def __init__(self, fn_name, use_stack=True):
method __enter__ (line 126) | def __enter__(self):
method __exit__ (line 129) | def __exit__(self, e, ev, t):
FILE: web/scripts/iou.js
function Box2 (line 69) | function Box2() {
function addRect (line 152) | function addRect(x, y, w, h, fill) {
function init2 (line 165) | function init2() {
function clear (line 222) | function clear(c) {
function mainDraw (line 229) | function mainDraw() {
function computeIoU (line 248) | function computeIoU(a, b) {
function myMove (line 269) | function myMove(e){
function myDown (line 383) | function myDown(e){
function myUp (line 425) | function myUp(){
function myDblClick (line 432) | function myDblClick(e) {
function invalidate (line 442) | function invalidate() {
function getMouse (line 448) | function getMouse(e) {
FILE: web/scripts/jquery.js
function m (line 2) | function m(e,t,n){var i,o=(t=t||r).createElement("script");if(o.text=e,n...
function x (line 2) | function x(e){return null==e?e+"":"object"==typeof e||"function"==typeof...
function C (line 2) | function C(e){var t=!!e&&"length"in e&&e.length,n=x(e);return!g(e)&&!y(e...
function oe (line 2) | function oe(e,t,r,i){var o,s,l,c,f,h,v,m=t&&t.ownerDocument,T=t?t.nodeTy...
function ae (line 2) | function ae(){var e=[];function t(n,i){return e.push(n+" ")>r.cacheLengt...
function se (line 2) | function se(e){return e[b]=!0,e}
function ue (line 2) | function ue(e){var t=d.createElement("fieldset");try{return!!e(t)}catch(...
function le (line 2) | function le(e,t){var n=e.split("|"),i=n.length;while(i--)r.attrHandle[n[...
function ce (line 2) | function ce(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourc...
function fe (line 2) | function fe(e){return function(t){return"input"===t.nodeName.toLowerCase...
function pe (line 2) | function pe(e){return function(t){var n=t.nodeName.toLowerCase();return(...
function de (line 2) | function de(e){return function(t){return"form"in t?t.parentNode&&!1===t....
function he (line 2) | function he(e){return se(function(t){return t=+t,se(function(n,r){var i,...
function ge (line 2) | function ge(e){return e&&"undefined"!=typeof e.getElementsByTagName&&e}
function ye (line 2) | function ye(){}
function ve (line 2) | function ve(e){for(var t=0,n=e.length,r="";t<n;t++)r+=e[t].value;return r}
function me (line 2) | function me(e,t,n){var r=t.dir,i=t.next,o=i||r,a=n&&"parentNode"===o,s=C...
function xe (line 2) | function xe(e){return e.length>1?function(t,n,r){var i=e.length;while(i-...
function be (line 2) | function be(e,t,n){for(var r=0,i=t.length;r<i;r++)oe(e,t[r],n);return n}
function we (line 2) | function we(e,t,n,r,i){for(var o,a=[],s=0,u=e.length,l=null!=t;s<u;s++)(...
function Te (line 2) | function Te(e,t,n,r,i,o){return r&&!r[b]&&(r=Te(r)),i&&!i[b]&&(i=Te(i,o)...
function Ce (line 2) | function Ce(e){for(var t,n,i,o=e.length,a=r.relative[e[0].type],s=a||r.r...
function Ee (line 2) | function Ee(e,t){var n=t.length>0,i=e.length>0,o=function(o,a,s,u,c){var...
function N (line 2) | function N(e,t){return e.nodeName&&e.nodeName.toLowerCase()===t.toLowerC...
function j (line 2) | function j(e,t,n){return g(t)?w.grep(e,function(e,r){return!!t.call(e,r,...
function P (line 2) | function P(e,t){while((e=e[t])&&1!==e.nodeType);return e}
function R (line 2) | function R(e){var t={};return w.each(e.match(M)||[],function(e,n){t[n]=!...
function I (line 2) | function I(e){return e}
function W (line 2) | function W(e){throw e}
function $ (line 2) | function $(e,t,n,r){var i;try{e&&g(i=e.promise)?i.call(e).done(t).fail(n...
function a (line 2) | function a(t,n,r,i){return function(){var s=this,u=arguments,l=function(...
function _ (line 2) | function _(){r.removeEventListener("DOMContentLoaded",_),e.removeEventLi...
function V (line 2) | function V(e,t){return t.toUpperCase()}
function G (line 2) | function G(e){return e.replace(X,"ms-").replace(U,V)}
function Q (line 2) | function Q(){this.expando=w.expando+Q.uid++}
function te (line 2) | function te(e){return"true"===e||"false"!==e&&("null"===e?null:e===+e+""...
function ne (line 2) | function ne(e,t,n){var r;if(void 0===n&&1===e.nodeType)if(r="data-"+t.re...
function ue (line 2) | function ue(e,t,n,r){var i,o,a=20,s=r?function(){return r.cur()}:functio...
function ce (line 2) | function ce(e){var t,n=e.ownerDocument,r=e.nodeName,i=le[r];return i||(t...
function fe (line 2) | function fe(e,t){for(var n,r,i=[],o=0,a=e.length;o<a;o++)(r=e[o]).style&...
function ye (line 2) | function ye(e,t){var n;return n="undefined"!=typeof e.getElementsByTagNa...
function ve (line 2) | function ve(e,t){for(var n=0,r=e.length;n<r;n++)J.set(e[n],"globalEval",...
function xe (line 2) | function xe(e,t,n,r,i){for(var o,a,s,u,l,c,f=t.createDocumentFragment(),...
function Ee (line 2) | function Ee(){return!0}
function ke (line 2) | function ke(){return!1}
function Se (line 2) | function Se(){try{return r.activeElement}catch(e){}}
function De (line 2) | function De(e,t,n,r,i,o){var a,s;if("object"==typeof t){"string"!=typeof...
function Le (line 2) | function Le(e,t){return N(e,"table")&&N(11!==t.nodeType?t:t.firstChild,"...
function He (line 2) | function He(e){return e.type=(null!==e.getAttribute("type"))+"/"+e.type,e}
function Oe (line 2) | function Oe(e){return"true/"===(e.type||"").slice(0,5)?e.type=e.type.sli...
function Pe (line 2) | function Pe(e,t){var n,r,i,o,a,s,u,l;if(1===t.nodeType){if(J.hasData(e)&...
function Me (line 2) | function Me(e,t){var n=t.nodeName.toLowerCase();"input"===n&&pe.test(e.t...
function Re (line 2) | function Re(e,t,n,r){t=a.apply([],t);var i,o,s,u,l,c,f=0,p=e.length,d=p-...
function Ie (line 2) | function Ie(e,t,n){for(var r,i=t?w.filter(t,e):e,o=0;null!=(r=i[o]);o++)...
function t (line 2) | function t(){if(c){l.style.cssText="position:absolute;left:-11111px;widt...
function n (line 2) | function n(e){return Math.round(parseFloat(e))}
function Fe (line 2) | function Fe(e,t,n){var r,i,o,a,s=e.style;return(n=n||$e(e))&&(""!==(a=n....
function _e (line 2) | function _e(e,t){return{get:function(){if(!e())return(this.get=t).apply(...
function Qe (line 2) | function Qe(e){if(e in Ye)return e;var t=e[0].toUpperCase()+e.slice(1),n...
function Je (line 2) | function Je(e){var t=w.cssProps[e];return t||(t=w.cssProps[e]=Qe(e)||e),t}
function Ke (line 2) | function Ke(e,t,n){var r=ie.exec(t);return r?Math.max(0,r[2]-(n||0))+(r[...
function Ze (line 2) | function Ze(e,t,n,r,i,o){var a="width"===t?1:0,s=0,u=0;if(n===(r?"border...
function et (line 2) | function et(e,t,n){var r=$e(e),i=Fe(e,t,r),o="border-box"===w.css(e,"box...
function tt (line 2) | function tt(e,t,n,r,i){return new tt.prototype.init(e,t,n,r,i)}
function at (line 2) | function at(){rt&&(!1===r.hidden&&e.requestAnimationFrame?e.requestAnima...
function st (line 2) | function st(){return e.setTimeout(function(){nt=void 0}),nt=Date.now()}
function ut (line 2) | function ut(e,t){var n,r=0,i={height:e};for(t=t?1:0;r<4;r+=2-t)i["margin...
function lt (line 2) | function lt(e,t,n){for(var r,i=(pt.tweeners[t]||[]).concat(pt.tweeners["...
function ct (line 2) | function ct(e,t,n){var r,i,o,a,s,u,l,c,f="width"in t||"height"in t,p=thi...
function ft (line 2) | function ft(e,t){var n,r,i,o,a;for(n in e)if(r=G(n),i=t[r],o=e[n],Array....
function pt (line 2) | function pt(e,t,n){var r,i,o=0,a=pt.prefilters.length,s=w.Deferred().alw...
function vt (line 2) | function vt(e){return(e.match(M)||[]).join(" ")}
function mt (line 2) | function mt(e){return e.getAttribute&&e.getAttribute("class")||""}
function xt (line 2) | function xt(e){return Array.isArray(e)?e:"string"==typeof e?e.match(M)||...
function jt (line 2) | function jt(e,t,n,r){var i;if(Array.isArray(t))w.each(t,function(t,i){n|...
function Ft (line 2) | function Ft(e){return function(t,n){"string"!=typeof t&&(n=t,t="*");var ...
function _t (line 2) | function _t(e,t,n,r){var i={},o=e===Wt;function a(s){var u;return i[s]=!...
function zt (line 2) | function zt(e,t){var n,r,i=w.ajaxSettings.flatOptions||{};for(n in t)voi...
function Xt (line 2) | function Xt(e,t,n){var r,i,o,a,s=e.contents,u=e.dataTypes;while("*"===u[...
function Ut (line 2) | function Ut(e,t,n,r){var i,o,a,s,u,l={},c=e.dataTypes.slice();if(c[1])fo...
function k (line 2) | function k(t,n,r,s){var l,p,d,b,T,C=n;c||(c=!0,u&&e.clearTimeout(u),i=vo...
FILE: web/scripts/js.cookie.js
function extend (line 27) | function extend () {
function init (line 39) | function init (converter) {
FILE: web/scripts/utils.js
function load_RLE (line 1) | function load_RLE(rle_obj, fillColor=[255, 255, 255], alpha=255) {
function uncompress_RLE (line 43) | function uncompress_RLE(rle_str) {
function hexToRgb (line 68) | function hexToRgb(hex) {
FILE: web/scripts/viewer.js
function save_settings (line 25) | function save_settings() {
function load_settings (line 29) | function load_settings() {
function is_object (line 77) | function is_object(val) { return val === Object(val); }
function fill_info (line 79) | function fill_info(info) {
function fill_controls (line 105) | function fill_controls() {
function render (line 169) | function render() {
FILE: web/server.py
class Handler (line 9) | class Handler(SimpleHTTPRequestHandler):
method do_GET (line 11) | def do_GET(self):
method send_str (line 21) | def send_str(self, string):
method send_file (line 30) | def send_file(self, path):
method send_response (line 49) | def send_response(self, code, message=None):
FILE: yolact.py
class Concat (line 34) | class Concat(nn.Module):
method __init__ (line 35) | def __init__(self, nets, extra_params):
method forward (line 41) | def forward(self, x):
class PredictionModule (line 47) | class PredictionModule(nn.Module):
method __init__ (line 73) | def __init__(self, in_channels, out_channels=1024, aspect_ratios=[[1]]...
method forward (line 133) | def forward(self, x):
method make_priors (line 214) | def make_priors(self, conv_h, conv_w, device):
class FPN (line 265) | class FPN(ScriptModuleWrapper):
method __init__ (line 283) | def __init__(self, in_channels):
method forward (line 311) | def forward(self, convouts:List[torch.Tensor]):
class FastMaskIoUNet (line 363) | class FastMaskIoUNet(ScriptModuleWrapper):
method __init__ (line 365) | def __init__(self):
method forward (line 371) | def forward(self, x):
class Yolact (line 379) | class Yolact(nn.Module):
method __init__ (line 399) | def __init__(self):
method save_weights (line 473) | def save_weights(self, path):
method load_weights (line 477) | def load_weights(self, path):
method init_weights (line 492) | def init_weights(self, backbone_path):
method train (line 549) | def train(self, mode=True):
method freeze_bn (line 555) | def freeze_bn(self, enable=False):
method forward (line 564) | def forward(self, x):
Copy disabled (too large)
Download .json
Condensed preview — 96 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (21,611K chars).
[
{
"path": ".gitignore",
"chars": 1799,
"preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
},
{
"path": "LICENSE",
"chars": 35149,
"preview": " GNU GENERAL PUBLIC LICENSE\n Version 3, 29 June 2007\n\n Copyright (C) 2007 Free "
},
{
"path": "README.md",
"chars": 19729,
"preview": "<img src=\"CIoU.png\" width=\"800px\"/>\n\n### English | [简体中文](README_zh-CN.md)\n\n## Complete-IoU Loss and Cluster-NMS for Imp"
},
{
"path": "README_zh-CN.md",
"chars": 14193,
"preview": "<img src=\"CIoU.png\" width=\"800px\"/>\n\n### [English](README.md) | 简体中文\n\n## Complete-IoU Loss and Cluster-NMS for Improving"
},
{
"path": "backbone.py",
"chars": 17286,
"preview": "import torch\nimport torch.nn as nn\nimport pickle\n\nfrom collections import OrderedDict\n\ntry:\n from dcn_v2 import DCN\ne"
},
{
"path": "data/__init__.py",
"chars": 86,
"preview": "from .config import *\nfrom .coco import *\n\nimport torch\nimport cv2\nimport numpy as np\n"
},
{
"path": "data/coco.py",
"chars": 10858,
"preview": "import os\nimport os.path as osp\nimport sys\nimport torch\nimport torch.utils.data as data\nimport torch.nn.functional as F\n"
},
{
"path": "data/config.py",
"chars": 31172,
"preview": "from backbone import ResNetBackbone, VGGBackbone, ResNetBackboneGN, DarkNetBackbone\nfrom math import sqrt\nimport torch\n\n"
},
{
"path": "data/scripts/COCO.sh",
"chars": 1647,
"preview": "#!/bin/bash\n\nstart=`date +%s`\n\n# handle optional download dir\nif [ -z \"$1\" ]\n then\n # navigate to ./data\n echo \"n"
},
{
"path": "data/scripts/COCO_test.sh",
"chars": 1251,
"preview": "#!/bin/bash\n\nstart=`date +%s`\n\n# handle optional download dir\nif [ -z \"$1\" ]\n then\n # navigate to ./data\n echo \"n"
},
{
"path": "data/scripts/mix_sets.py",
"chars": 2409,
"preview": "import json\nimport os\nimport sys\nfrom collections import defaultdict\n\nusage_text = \"\"\"\nThis script creates a coco annota"
},
{
"path": "environment.yml",
"chars": 928,
"preview": "# Installs dependencies for YOLACT managed by Anaconda. \n# Advantage is you get working CUDA+cuDNN+pytorch+torchvison ve"
},
{
"path": "eval.py",
"chars": 48673,
"preview": "from data import COCODetection, get_label_map, MEANS, COLORS\nfrom yolact import Yolact\nfrom utils.augmentations import B"
},
{
"path": "external/DCNv2/LICENSE",
"chars": 1520,
"preview": "BSD 3-Clause License\n\nCopyright (c) 2019, Charles Shang\nAll rights reserved.\n\nRedistribution and use in source and binar"
},
{
"path": "external/DCNv2/README.md",
"chars": 2226,
"preview": "## Deformable Convolutional Networks V2 with Pytorch 1.0\n\n### Build\n```bash\n ./make.sh # build\n python tes"
},
{
"path": "external/DCNv2/dcn_v2.py",
"chars": 12081,
"preview": "#!/usr/bin/env python\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ impor"
},
{
"path": "external/DCNv2/setup.py",
"chars": 1978,
"preview": "#!/usr/bin/env python\n\nimport os\nimport glob\n\nimport torch\n\nfrom torch.utils.cpp_extension import CUDA_HOME\nfrom torch.u"
},
{
"path": "external/DCNv2/src/cpu/dcn_v2_cpu.cpp",
"chars": 2819,
"preview": "#include <vector>\n\n#include <ATen/ATen.h>\n#include <ATen/cuda/CUDAContext.h>\n\n\nat::Tensor\ndcn_v2_cpu_forward(const at::T"
},
{
"path": "external/DCNv2/src/cpu/vision.h",
"chars": 2665,
"preview": "#pragma once\n#include <torch/extension.h>\n\nat::Tensor\ndcn_v2_cpu_forward(const at::Tensor &input,\n co"
},
{
"path": "external/DCNv2/src/cuda/dcn_v2_cuda.cu",
"chars": 14774,
"preview": "#include <vector>\n#include \"cuda/dcn_v2_im2col_cuda.h\"\n\n#include <ATen/ATen.h>\n#include <ATen/cuda/CUDAContext.h>\n\n#incl"
},
{
"path": "external/DCNv2/src/cuda/dcn_v2_im2col_cuda.cu",
"chars": 20295,
"preview": "#include \"dcn_v2_im2col_cuda.h\"\n#include <cstdio>\n#include <algorithm>\n#include <cstring>\n\n#include <ATen/ATen.h>\n#inclu"
},
{
"path": "external/DCNv2/src/cuda/dcn_v2_im2col_cuda.h",
"chars": 5226,
"preview": "\n/*!\n ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************\n *\n * COPYRIGHT\n *\n * All contrib"
},
{
"path": "external/DCNv2/src/cuda/dcn_v2_psroi_pooling_cuda.cu",
"chars": 16214,
"preview": "/*!\n * Copyright (c) 2017 Microsoft\n * Licensed under The MIT License [see LICENSE for details]\n * \\file deformable_psro"
},
{
"path": "external/DCNv2/src/cuda/vision.h",
"chars": 2669,
"preview": "#pragma once\n#include <torch/extension.h>\n\nat::Tensor\ndcn_v2_cuda_forward(const at::Tensor &input,\n c"
},
{
"path": "external/DCNv2/src/dcn_v2.h",
"chars": 5494,
"preview": "#pragma once\n\n#include \"cpu/vision.h\"\n\n#ifdef WITH_CUDA\n#include \"cuda/vision.h\"\n#endif\n\nat::Tensor\ndcn_v2_forward(const"
},
{
"path": "external/DCNv2/src/vision.cpp",
"chars": 405,
"preview": "\n#include \"dcn_v2.h\"\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n m.def(\"dcn_v2_forward\", &dcn_v2_forward, \"dcn_v2_forw"
},
{
"path": "external/DCNv2/test.py",
"chars": 8506,
"preview": "#!/usr/bin/env python\nfrom __future__ import absolute_import\nfrom __future__ import print_function\nfrom __future__ impor"
},
{
"path": "layers/__init__.py",
"chars": 48,
"preview": "from .functions import *\nfrom .modules import *\n"
},
{
"path": "layers/box_utils.py",
"chars": 18878,
"preview": "# -*- coding: utf-8 -*-\nimport torch\nfrom utils import timer\n\nfrom data import cfg\n\n@torch.jit.script\ndef point_form(box"
},
{
"path": "layers/functions/__init__.py",
"chars": 53,
"preview": "from .detection import Detect\n\n\n__all__ = ['Detect']\n"
},
{
"path": "layers/functions/detection.py",
"chars": 24963,
"preview": "import torch\nimport torch.nn.functional as F\nfrom ..box_utils import decode, jaccard, distance, diou, index2d\nfrom utils"
},
{
"path": "layers/interpolate.py",
"chars": 412,
"preview": "import torch.nn as nn\nimport torch.nn.functional as F\n\nclass InterpolateModule(nn.Module):\n\t\"\"\"\n\tThis is a module versio"
},
{
"path": "layers/modules/__init__.py",
"chars": 68,
"preview": "from .multibox_loss import MultiBoxLoss\n\n__all__ = ['MultiBoxLoss']\n"
},
{
"path": "layers/modules/multibox_loss.py",
"chars": 35217,
"preview": "# -*- coding: utf-8 -*-\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport math\nfrom torch.autogr"
},
{
"path": "layers/output_utils.py",
"chars": 6932,
"preview": "\"\"\" Contains functions used to sanitize and prepare the output of Yolact. \"\"\"\n\n\nimport torch\nimport torch.nn as nn\nimpor"
},
{
"path": "run_coco_eval.py",
"chars": 1416,
"preview": "\"\"\"\nRuns the coco-supplied cocoeval script to evaluate detections\noutputted by using the output_coco_json flag in eval.p"
},
{
"path": "scripts/augment_bbox.py",
"chars": 3978,
"preview": "\nimport os.path as osp\nimport json, pickle\nimport sys\nfrom math import sqrt\nfrom itertools import product\nimport torch\nf"
},
{
"path": "scripts/bbox_recall.py",
"chars": 5960,
"preview": "\"\"\"\nThis script compiles all the bounding boxes in the training data and\nclusters them for each convout resolution on wh"
},
{
"path": "scripts/cluster_bbox_sizes.py",
"chars": 1758,
"preview": "\"\"\"\nThis script compiles all the bounding boxes in the training data and\nclusters them for each convout resolution on wh"
},
{
"path": "scripts/compute_masks.py",
"chars": 2618,
"preview": "import numpy as np\nimport matplotlib.pyplot as plt\nimport cv2\nimport torch\nimport torch.nn.functional as F\n\nCOLORS = ((2"
},
{
"path": "scripts/convert_darknet.py",
"chars": 1466,
"preview": "from backbone import DarkNetBackbone\nimport h5py\nimport torch\n\nf = h5py.File('darknet53.h5', 'r')\nm = f['model_weights']"
},
{
"path": "scripts/convert_sbd.py",
"chars": 2160,
"preview": "import scipy.io, scipy.ndimage\nimport os.path, json\nimport pycocotools.mask\nimport numpy as np\n\ndef mask2bbox(mask):\n "
},
{
"path": "scripts/eval.sh",
"chars": 306,
"preview": "#!/bin/bash\n#SBATCH -p GPU-small\n#SBATCH -t 2:00:00\n#SBATCH --gres=gpu:p100:1\n#SBATCH --no-requeue\n\n# Usage: ./eval.sh w"
},
{
"path": "scripts/make_grid.py",
"chars": 5041,
"preview": "import numpy as np\nimport math, random\n\nimport matplotlib.pyplot as plt\nfrom matplotlib.widgets import Slider, Button\n\n\n"
},
{
"path": "scripts/optimize_bboxes.py",
"chars": 6743,
"preview": "\"\"\"\nInstead of clustering bbox widths and heights, this script\ndirectly optimizes average IoU across the training set gi"
},
{
"path": "scripts/parse_eval.py",
"chars": 1166,
"preview": "import re, sys, os\nimport matplotlib.pyplot as plt\nfrom matplotlib._color_data import XKCD_COLORS\n\nwith open(sys.argv[1]"
},
{
"path": "scripts/plot_loss.py",
"chars": 1932,
"preview": "import re, sys, os\nimport matplotlib.pyplot as plt\n\nfrom utils.functions import MovingAverage\n\nwith open(sys.argv[1], 'r"
},
{
"path": "scripts/resume.sh",
"chars": 348,
"preview": "#!/bin/bash\n#SBATCH -p GPU-shared\n#SBATCH -t 48:00:00\n#SBATCH --gres=gpu:p100:1\n#SBATCH --no-requeue\n\n# Usage: ./resume."
},
{
"path": "scripts/save_bboxes.py",
"chars": 797,
"preview": "\"\"\" This script transforms and saves bbox coordinates into a pickle object for easy loading. \"\"\"\n\n\nimport os.path as osp"
},
{
"path": "scripts/train.sh",
"chars": 302,
"preview": "#!/bin/bash\n#SBATCH -p GPU-shared\n#SBATCH -t 48:00:00\n#SBATCH --gres=gpu:p100:1\n#SBATCH --no-requeue\n\n# Usage: ./train.s"
},
{
"path": "scripts/unpack_statedict.py",
"chars": 456,
"preview": "import torch\nimport sys, os\n\n# Usage python scripts/unpack_statedict.py path_to_pth out_folder/\n# Make sure to include t"
},
{
"path": "train.py",
"chars": 21482,
"preview": "from data import *\nfrom utils.augmentations import SSDAugmentation, BaseTransform\nfrom utils.functions import MovingAver"
},
{
"path": "utils/__init__.py",
"chars": 42,
"preview": "from .augmentations import SSDAugmentation"
},
{
"path": "utils/augmentations.py",
"chars": 23931,
"preview": "import torch\nfrom torchvision import transforms\nimport cv2\nimport numpy as np\nimport types\nfrom numpy import random\nfrom"
},
{
"path": "utils/cython_nms.pyx",
"chars": 2457,
"preview": "## Note: Figure out the license details later.\n#\n# Based on:\n# --------------------------------------------------------\n"
},
{
"path": "utils/functions.py",
"chars": 6499,
"preview": "import torch\nimport torch.nn as nn\nimport os\nimport math\nfrom collections import deque\nfrom pathlib import Path\nfrom lay"
},
{
"path": "utils/logger.py",
"chars": 15327,
"preview": "import os\nimport json\nimport time\nimport sys\n\nfrom typing import Union\nimport datetime\n\nfrom collections import defaultd"
},
{
"path": "utils/nvinfo.py",
"chars": 2402,
"preview": "# My version of nvgpu because nvgpu didn't have all the information I was looking for.\nimport re\nimport subprocess\nimpor"
},
{
"path": "utils/timer.py",
"chars": 3508,
"preview": "import time\nfrom collections import defaultdict\n\n_total_times = defaultdict(lambda: 0)\n_start_times = defaultdict(lambd"
},
{
"path": "web/css/index.css",
"chars": 759,
"preview": "\n/* \nPallete:\n\nFFFFFF\nD2CBCB\n7D8491\n003459\n274C77\n161925\n*/\n\n* { box-sizing: border-box; }\n\n.big {\n\tfont-size:72px;\n\tmar"
},
{
"path": "web/css/list.css",
"chars": 616,
"preview": "ul {\n\tlist-style-type: none;\n\tmargin: 0;\n\tpadding: 0;\n}\n\nli {\n\t/* font: 200 24px/1.5 Helvetica, Verdana, sans-serif; */\n"
},
{
"path": "web/css/toggle.css",
"chars": 833,
"preview": ".switch {\n\tposition: relative;\n\ttop: 5;\n}\n \n.switch input {display:none;}\n\n.slider {\n\tposition: relative;\n\tdisplay: inl"
},
{
"path": "web/css/viewer.css",
"chars": 999,
"preview": "\n.info { grid-area: info; }\n.image { grid-area: image; }\n.controls { grid-area: controls; }\n\n\n#viewer {\n\td"
},
{
"path": "web/dets/ssd300.json",
"chars": 775791,
"preview": "{\"info\": {\"Cross-Class NMS\": {\"BBox mAP\": 22.6,\"Mask mAP\": 13.5},\"Per-Class NMS\": {\"BBox mAP\": 23.9,\"Mask mAP\": 14.1},\"C"
},
{
"path": "web/dets/ssd550.json",
"chars": 797692,
"preview": "{\"info\": {\"Cross-Class NMS\": {\"BBox mAP\": 0.2104,\"Mask mAP\": 0.1197},\"Per-Class NMS\": {\"BBox mAP\": 0.2229,\"Mask mAP\": 0."
},
{
"path": "web/dets/ssd550_resnet101.json",
"chars": 700440,
"preview": "{\"info\": {\"Cross-Class NMS\": {\"BBox mAP\": 26.6,\"Mask mAP\": 15.7},\"Per-Class NMS\": {\"BBox mAP\": 27.9,\"Mask mAP\": 16.3},\"C"
},
{
"path": "web/dets/test.json",
"chars": 338,
"preview": "{\n\t\"info\": {\n\t\t\"Cross-Class NMS\": {\"BBox mAP\": 0.9523,\"Mask mAP\": 0.4231},\"Per-Class NMS\": {\"BBox mAP\": 0.1,\"Mask mAP\": "
},
{
"path": "web/dets/yolact_base.json",
"chars": 1173850,
"preview": "{\"info\": {\"Config\": {\"preserve_aspect_ratio\": false, \"use_prediction_module\": false, \"use_yolo_regressors\": false, \"use_"
},
{
"path": "web/dets/yolact_darknet53.json",
"chars": 1231083,
"preview": "{\"info\": {\"Config\": {\"preserve_aspect_ratio\": false, \"use_prediction_module\": false, \"use_yolo_regressors\": false, \"use_"
},
{
"path": "web/dets/yolact_im700.json",
"chars": 1052310,
"preview": "{\"info\": {\"Config\": {\"preserve_aspect_ratio\": false, \"use_prediction_module\": false, \"use_yolo_regressors\": false, \"use_"
},
{
"path": "web/dets/yolact_resnet101_conv4.json",
"chars": 1228641,
"preview": "{\"info\": {\"Cross-Class NMS\": {\"BBox mAP\": 19.1,\"Mask mAP\": 14.5},\"Config\": {\"preserve_aspect_ratio\": false, \"use_predict"
},
{
"path": "web/dets/yolact_resnet101_maskrcnn.json",
"chars": 1226951,
"preview": "{\"info\": {\"Cross-Class NMS\": {\"BBox mAP\": 20.78,\"Mask mAP\": 15.45}, \"Config\": {\"preserve_aspect_ratio\": false, \"use_pred"
},
{
"path": "web/dets/yolact_resnet101_maskrcnn_1.json",
"chars": 907153,
"preview": "{\"info\": {\"Cross-Class NMS\": {\"BBox mAP\": 26.48,\"Mask mAP\": 17.07},\"Config\": {\"preserve_aspect_ratio\": false, \"use_predi"
},
{
"path": "web/dets/yolact_resnet50.json",
"chars": 1359223,
"preview": "{\"info\": {\"Config\": {\"preserve_aspect_ratio\": false, \"use_prediction_module\": false, \"use_yolo_regressors\": false, \"use_"
},
{
"path": "web/dets/yrm12.json",
"chars": 883379,
"preview": "{\"info\": {\"Cross-Class NMS\": {\"BBox mAP\": 26.90,\"Mask mAP\": 17.28},\"Config\": {\"preserve_aspect_ratio\": false, \"use_predi"
},
{
"path": "web/dets/yrm13.json",
"chars": 773777,
"preview": "{\"info\": {\"Cross-Class NMS\": {\"BBox mAP\": 27.02,\"Mask mAP\": 17.64},\"Config\": {\"preserve_aspect_ratio\": false, \"use_predi"
},
{
"path": "web/dets/yrm16_2.json",
"chars": 1050006,
"preview": "{\"info\": {\"Config\": {\"preserve_aspect_ratio\": false, \"use_prediction_module\": false, \"use_yolo_regressors\": false, \"use_"
},
{
"path": "web/dets/yrm18.json",
"chars": 929132,
"preview": "{\"info\": {\"Config\": {\"preserve_aspect_ratio\": false, \"use_prediction_module\": false, \"use_yolo_regressors\": false, \"use_"
},
{
"path": "web/dets/yrm19.json",
"chars": 961471,
"preview": "{\"info\": {\"Config\": {\"preserve_aspect_ratio\": false, \"use_prediction_module\": false, \"use_yolo_regressors\": false, \"use_"
},
{
"path": "web/dets/yrm21.json",
"chars": 911489,
"preview": "{\"info\": {\"Config\": {\"preserve_aspect_ratio\": false, \"use_prediction_module\": false, \"use_yolo_regressors\": false, \"use_"
},
{
"path": "web/dets/yrm25_b.json",
"chars": 898542,
"preview": "{\"info\": {\"Config\": {\"preserve_aspect_ratio\": false, \"use_prediction_module\": false, \"use_yolo_regressors\": false, \"use_"
},
{
"path": "web/dets/yrm28_2_perfect.json",
"chars": 926365,
"preview": "{\"info\": {\"Config\": {\"preserve_aspect_ratio\": false, \"use_prediction_module\": false, \"use_yolo_regressors\": false, \"use_"
},
{
"path": "web/dets/yrm35_crop.json",
"chars": 1214490,
"preview": "{\"info\": {\"Config\": {\"preserve_aspect_ratio\": false, \"use_prediction_module\": false, \"use_yolo_regressors\": false, \"use_"
},
{
"path": "web/dets/yrm35_retina.json",
"chars": 814845,
"preview": "{\"info\": {\"Config\": {\"preserve_aspect_ratio\": false, \"use_prediction_module\": false, \"use_yolo_regressors\": false, \"use_"
},
{
"path": "web/index.html",
"chars": 803,
"preview": "<html lang=\"en\">\n<head>\n\t<meta charset=\"UTF-8\">\n\t<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n"
},
{
"path": "web/iou.html",
"chars": 1065,
"preview": "\n\n<html lang=\"en\">\n\n<head>\n\t<meta charset=\"UTF-8\">\n\t<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0"
},
{
"path": "web/scripts/index.js",
"chars": 421,
"preview": "$(document).ready(function() {\n\t// Load in det_index and fill the config list with the appropriate elements\n\t$.ajax({\n\t\t"
},
{
"path": "web/scripts/iou.js",
"chars": 12442,
"preview": "// IoU added by Daniel Bolya\n//\n// Last updated November 2010 by Simon Sarris\n// www.simonsarris.com\n// sarris@acm.org\n/"
},
{
"path": "web/scripts/jquery.js",
"chars": 86927,
"preview": "/*! jQuery v3.3.1 | (c) JS Foundation and other contributors | jquery.org/license */\n!function(e,t){\"use strict\";\"object"
},
{
"path": "web/scripts/js.cookie.js",
"chars": 3886,
"preview": "/*!\n * JavaScript Cookie v2.2.0\n * https://github.com/js-cookie/js-cookie\n *\n * Copyright 2006, 2015 Klaus Hartl & Fagne"
},
{
"path": "web/scripts/utils.js",
"chars": 1752,
"preview": "function load_RLE(rle_obj, fillColor=[255, 255, 255], alpha=255) {\n\tvar h = rle_obj.size[0], w = rle_obj.size[1];\n\tvar c"
},
{
"path": "web/scripts/viewer.js",
"chars": 6129,
"preview": "// Global variables so I remember them\nconfig_name = null;\nimg_idx = null;\n\nimg = null;\ndets = null;\nmasks = null;\n\n// M"
},
{
"path": "web/server.py",
"chars": 1593,
"preview": "from http.server import SimpleHTTPRequestHandler, HTTPServer, HTTPStatus\nfrom pathlib import Path\nimport os\n\nPORT = 6337"
},
{
"path": "web/viewer.html",
"chars": 1288,
"preview": "<html lang=\"en\">\n<head>\n\t<meta charset=\"UTF-8\">\n\t<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n"
},
{
"path": "yolact.py",
"chars": 31092,
"preview": "import torch, torchvision\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torchvision.models.resnet import Bo"
}
]
// ... and 1 more files (download for full content)
About this extraction
This page contains the full source code of the Zzh-tju/CIoU GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 96 files (19.5 MB), approximately 5.1M tokens, and a symbol index with 503 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.