Showing preview only (1,187K chars total). Download the full file or copy to clipboard to get everything.
Repository: woodfrog/maptracker
Branch: main
Commit: ee8321fa5dac
Files: 161
Total size: 1.1 MB
Directory structure:
gitextract_kr8oe7q1/
├── .gitignore
├── LICENSE
├── LICENSE_GPL
├── README.md
├── docs/
│ ├── data_preparation.md
│ ├── getting_started.md
│ └── installation.md
├── plugin/
│ ├── __init__.py
│ ├── configs/
│ │ ├── _base_/
│ │ │ ├── datasets/
│ │ │ │ ├── coco_instance.py
│ │ │ │ ├── kitti-3d-3class.py
│ │ │ │ ├── kitti-3d-car.py
│ │ │ │ ├── lyft-3d.py
│ │ │ │ ├── nuim_instance.py
│ │ │ │ ├── nus-3d.py
│ │ │ │ ├── nus-mono3d.py
│ │ │ │ ├── range100_lyft-3d.py
│ │ │ │ ├── s3dis_seg-3d-13class.py
│ │ │ │ ├── scannet-3d-18class.py
│ │ │ │ ├── scannet_seg-3d-20class.py
│ │ │ │ ├── sunrgbd-3d-10class.py
│ │ │ │ ├── waymoD5-3d-3class.py
│ │ │ │ └── waymoD5-3d-car.py
│ │ │ ├── default_runtime.py
│ │ │ ├── models/
│ │ │ │ ├── 3dssd.py
│ │ │ │ ├── cascade_mask_rcnn_r50_fpn.py
│ │ │ │ ├── centerpoint_01voxel_second_secfpn_nus.py
│ │ │ │ ├── centerpoint_02pillar_second_secfpn_nus.py
│ │ │ │ ├── fcos3d.py
│ │ │ │ ├── groupfree3d.py
│ │ │ │ ├── h3dnet.py
│ │ │ │ ├── hv_pointpillars_fpn_lyft.py
│ │ │ │ ├── hv_pointpillars_fpn_nus.py
│ │ │ │ ├── hv_pointpillars_fpn_range100_lyft.py
│ │ │ │ ├── hv_pointpillars_secfpn_kitti.py
│ │ │ │ ├── hv_pointpillars_secfpn_waymo.py
│ │ │ │ ├── hv_second_secfpn_kitti.py
│ │ │ │ ├── hv_second_secfpn_waymo.py
│ │ │ │ ├── imvotenet_image.py
│ │ │ │ ├── mask_rcnn_r50_fpn.py
│ │ │ │ ├── paconv_cuda_ssg.py
│ │ │ │ ├── paconv_ssg.py
│ │ │ │ ├── parta2.py
│ │ │ │ ├── pointnet2_msg.py
│ │ │ │ ├── pointnet2_ssg.py
│ │ │ │ └── votenet.py
│ │ │ └── schedules/
│ │ │ ├── cosine.py
│ │ │ ├── cyclic_20e.py
│ │ │ ├── cyclic_40e.py
│ │ │ ├── mmdet_schedule_1x.py
│ │ │ ├── schedule_2x.py
│ │ │ ├── schedule_3x.py
│ │ │ ├── seg_cosine_150e.py
│ │ │ ├── seg_cosine_200e.py
│ │ │ └── seg_cosine_50e.py
│ │ └── maptracker/
│ │ ├── av2_newsplit/
│ │ │ ├── maptracker_av2_100x50_newsplit_5frame_span10_stage1_bev_pretrain.py
│ │ │ ├── maptracker_av2_100x50_newsplit_5frame_span10_stage2_warmup.py
│ │ │ ├── maptracker_av2_100x50_newsplit_5frame_span10_stage3_joint_finetune.py
│ │ │ ├── maptracker_av2_newsplit_5frame_span10_stage1_bev_pretrain.py
│ │ │ ├── maptracker_av2_newsplit_5frame_span10_stage2_warmup.py
│ │ │ └── maptracker_av2_newsplit_5frame_span10_stage3_joint_finetune.py
│ │ ├── av2_oldsplit/
│ │ │ ├── maptracker_av2_oldsplit_5frame_span10_stage1_bev_pretrain.py
│ │ │ ├── maptracker_av2_oldsplit_5frame_span10_stage2_warmup.py
│ │ │ └── maptracker_av2_oldsplit_5frame_span10_stage3_joint_finetune.py
│ │ ├── nuscenes_newsplit/
│ │ │ ├── maptracker_nusc_newsplit_5frame_span10_stage1_bev_pretrain.py
│ │ │ ├── maptracker_nusc_newsplit_5frame_span10_stage2_warmup.py
│ │ │ └── maptracker_nusc_newsplit_5frame_span10_stage3_joint_finetune.py
│ │ └── nuscenes_oldsplit/
│ │ ├── maptracker_nusc_oldsplit_5frame_span10_stage1_bev_pretrain.py
│ │ ├── maptracker_nusc_oldsplit_5frame_span10_stage2_warmup.py
│ │ └── maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune.py
│ ├── core/
│ │ ├── apis/
│ │ │ ├── __init__.py
│ │ │ ├── mmdet_train.py
│ │ │ ├── test.py
│ │ │ └── train.py
│ │ └── evaluation/
│ │ ├── __init__.py
│ │ └── eval_hooks.py
│ ├── datasets/
│ │ ├── __init__.py
│ │ ├── argo_dataset.py
│ │ ├── base_dataset.py
│ │ ├── builder.py
│ │ ├── evaluation/
│ │ │ ├── AP.py
│ │ │ ├── __init__.py
│ │ │ ├── distance.py
│ │ │ ├── raster_eval.py
│ │ │ └── vector_eval.py
│ │ ├── map_utils/
│ │ │ ├── av2map_extractor.py
│ │ │ ├── nuscmap_extractor.py
│ │ │ └── utils.py
│ │ ├── nusc_dataset.py
│ │ ├── pipelines/
│ │ │ ├── __init__.py
│ │ │ ├── formating.py
│ │ │ ├── loading.py
│ │ │ ├── rasterize.py
│ │ │ ├── transform.py
│ │ │ └── vectorize.py
│ │ ├── samplers/
│ │ │ ├── __init__.py
│ │ │ ├── distributed_sampler.py
│ │ │ ├── group_sampler.py
│ │ │ └── sampler.py
│ │ └── visualize/
│ │ └── renderer.py
│ └── models/
│ ├── __init__.py
│ ├── assigner/
│ │ ├── __init__.py
│ │ ├── assigner.py
│ │ └── match_cost.py
│ ├── backbones/
│ │ ├── __init__.py
│ │ ├── bevformer/
│ │ │ ├── __init__.py
│ │ │ ├── custom_base_transformer_layer.py
│ │ │ ├── encoder.py
│ │ │ ├── grid_mask.py
│ │ │ ├── multi_scale_deformable_attn_function.py
│ │ │ ├── spatial_cross_attention.py
│ │ │ ├── temporal_net.py
│ │ │ ├── temporal_self_attention.py
│ │ │ └── transformer.py
│ │ └── bevformer_backbone.py
│ ├── heads/
│ │ ├── MapDetectorHead.py
│ │ ├── MapSegHead.py
│ │ ├── __init__.py
│ │ └── base_map_head.py
│ ├── losses/
│ │ ├── __init__.py
│ │ ├── detr_loss.py
│ │ └── seg_loss.py
│ ├── mapers/
│ │ ├── MapTracker.py
│ │ ├── __init__.py
│ │ ├── base_mapper.py
│ │ └── vector_memory.py
│ ├── necks/
│ │ ├── __init__.py
│ │ └── gru.py
│ ├── transformer_utils/
│ │ ├── CustomMSDeformableAttention.py
│ │ ├── MapTransformer.py
│ │ ├── __init__.py
│ │ ├── base_transformer.py
│ │ ├── deformable_transformer.py
│ │ └── fp16_dattn.py
│ └── utils/
│ ├── __init__.py
│ ├── query_update.py
│ └── renderer_track.py
├── requirements.txt
└── tools/
├── benchmark.py
├── data_converter/
│ ├── __init__.py
│ ├── argoverse_converter.py
│ ├── av2_train_split.txt
│ ├── av2_val_split.txt
│ ├── nusc_split.py
│ └── nuscenes_converter.py
├── dist_test.sh
├── dist_train.sh
├── mmdet_test.py
├── mmdet_train.py
├── slurm_test.sh
├── slurm_train.sh
├── test.py
├── tracking/
│ ├── calculate_cmap.py
│ ├── cmap_utils/
│ │ ├── __init__.py
│ │ ├── data_utils.py
│ │ ├── match_utils.py
│ │ └── utils.py
│ ├── prepare_gt_tracks.py
│ └── prepare_pred_tracks.py
├── train.py
└── visualization/
├── vis_global.py
└── vis_per_frame.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
*.ipynb
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
# cython generated cpp
data
.vscode
.idea
# custom
*.pkl
*.gif
*.pkl.json
*.log.json
work_dirs/
work_dirs_bak/
debug_img/
model_file/
exps/
*~
mmdet3d/.mim
# Pytorch
*.pth
# demo
demo/
*.jpg
*.png
*.obj
*.ply
*.zip
*.tar
*.tar.gz
*.json
# datasets
/datasets
/data_ann
/datasets_local
# softlinks
av2
nuScenes
# viz
viz
viz_bak
*pkl*
demo
mmdetection3d
work_dirs
vis_global
vis_local
================================================
FILE: LICENSE
================================================
The code, data, and model weights in this repository are not allowed for commercial usage. For research purposes, the terms follow the GPLv3 as in the separate file "LICENSE_GPL".
-- Authors of the paper "MapTracker: Tracking with Strided Memory Fusion for Consistent Vector HD Mapping".
================================================
FILE: LICENSE_GPL
================================================
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users. We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors. You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so. This is fundamentally incompatible with the aim of
protecting users' freedom to change the software. The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable. Therefore, we
have designed this version of the GPL to prohibit the practice for those
products. If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.
Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary. To prevent this, the GPL assures that
patents cannot be used to render the program non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
Also add information on how to contact you by electronic and paper mail.
If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:
<program> Copyright (C) <year> <name of author>
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
<https://www.gnu.org/licenses/>.
The GNU General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
<https://www.gnu.org/licenses/why-not-lgpl.html>.
================================================
FILE: README.md
================================================
<div align="center">
<h2 align="center"> MapTracker: Tracking with Strided Memory Fusion for <br/> Consistent Vector HD Mapping </h1>
<h4 align="center"> ECCV 2024 (Oral) </h4>
[Jiacheng Chen*<sup>1</sup>](https://jcchen.me) , [Yuefan Wu*<sup>1</sup>](https://ivenwu.com/) , [Jiaqi Tan*<sup>1</sup>](https://www.linkedin.com/in/jiaqi-christina-tan-800697158/), [Hang Ma<sup>1</sup>](https://www.cs.sfu.ca/~hangma/), [Yasutaka Furukawa<sup>1,2</sup>](https://www2.cs.sfu.ca/~furukawa/)
<sup>1</sup> Simon Fraser University <sup>2</sup> Wayve
([arXiv](https://arxiv.org/abs/2403.15951), [Project page](https://map-tracker.github.io/))
</div>
https://github.com/woodfrog/maptracker/assets/13405255/1c0e072a-cb77-4000-b81b-5b9fd40f8f39
This repository provides the official implementation of the paper [MapTracker: Tracking with Strided Memory Fusion for Consistent Vector HD Mapping](https://arxiv.org/abs/2403.15951). MapTracker reconstructs temporally consistent vector HD maps, and the local maps can be progressively merged into a global reconstruction.
This repository is built upon [StreamMapNet](https://github.com/yuantianyuan01/StreamMapNet).
## Table of Contents
- [Introduction](#introduction)
- [Model Architecture](#model-architecture)
- [Installation](#installation)
- [Data preparation](#data-preparation)
- [Getting Started](#getting-started)
- [Acknowledgements](#acknowledgements)
- [Citation](#citation)
- [License](#license)
## Introduction
This paper presents a vector HD-mapping algorithm that formulates the mapping as a tracking task and uses a history of memory latents to ensure consistent reconstructions over time.
Our method, MapTracker, accumulates a sensor stream into memory buffers of two latent representations: 1) Raster latents in the bird's-eye-view (BEV) space and 2) Vector latents over the road elements (i.e., pedestrian-crossings, lane-dividers, and road-boundaries). The approach borrows the query propagation paradigm from the tracking literature that explicitly associates tracked road elements from the previous frame to the current, while fusing a subset of memory latents selected with distance strides to further enhance temporal consistency. A vector latent is decoded to reconstruct the geometry of a road element.
The paper further makes benchmark contributions by 1) Improving processing code for existing datasets to produce consistent ground truth with temporal alignments and 2) Augmenting existing mAP metrics with consistency checks. MapTracker significantly outperforms existing methods on both nuScenes and Agroverse2 datasets by over 8% and 19% on the conventional and the new consistency-aware metrics, respectively.
## Model Architecture

(Top) The architecture of MapTracker, consistsing of the BEV and VEC Modules and their memory buffers. (Bottom) The close-up views of the BEV and the vector fusion layers.
The **BEV Module** takes ConvNet features of onboard perspective images, the BEV memory buffer ${M_{\text{BEV}}(t-1), M_{\text{BEV}}(t-2),\ ... }$ and vehicle motions ${P^t_{t-1}, P^t_{t-2},\ ... }$ as input. It propagates the previous BEV memory $M_{\text{BEV}}(t-1)$ based on vehicle motion to initialize $M_{\text{BEV}}(t)$. In the BEV Memory Fusion layer, $M_{\text{BEV}}(t)$ is integrated with selected history BEV memories $\{M_{\text{BEV}}^{*}(t'), t'\in \pi(t)\}$, which is used for semantic segmentation and passed to the VEC Module.
The **VEC Module** propagates the previous latent vector memory $M_{\text{VEC}}(t-1)$ with a PropMLP to initialize the vector queries $M_{\text{VEC}}(t)$. In Vector Memory Fusion layer, each propagated $M_{\text{VEC}}(t)$ is fused with its selected history vector memories $\{M_{\text{VEC}}^{*}(t'), t' \in \pi(t)\}$. The final vector latents are decoded to reconstruct the road elements.
## Installation
Please refer to the [installation guide](docs/installation.md) to set up the environment.
## Data preparation
For how to download and prepare data for the nuScenes and Argoverse2 datasets, as well as downloading our checkpoints, please see the [data preparation guide](docs/data_preparation.md).
## Getting Started
For instructions on how to run training, inference, evaluation, and visualization, please follow [getting started guide](docs/getting_started.md).
## Acknowledgements
We're grateful to the open-source projects below, their great work made our project possible:
* BEV perception: [BEVFormer](https://github.com/fundamentalvision/BEVFormer) 
* Vector HD mapping: [StreamMapNet](https://github.com/yuantianyuan01/StreamMapNet) , [MapTR](https://github.com/hustvl/MapTR) 
## Citation
If you find MapTracker useful in your research or applications, please consider citing:
```
@inproceedings{chen2024maptrakcer,
author = {Chen, Jiacheng and Wu, Yuefan and Tan, Jiaqi and Ma, Hang and Furukawa, Yasutaka},
title = {MapTracker: Tracking with Strided Memory Fusion for Consistent Vector HD Mapping},
journal = {arXiv preprint arXiv:2403.15951},
year = {2024}
}
```
## License
This project is licensed under GPL, see the [license file](LICENSE) for details.
================================================
FILE: docs/data_preparation.md
================================================
# Data Preparation
Compared to the data preparation procedure of StreamMapNet or MapTR, we have one more step to generate the ground truth tracking information (Step 3).
We noticed that the track generation results can be slighly different when running on different machines (potentially because Shapely's behaviors are slightly different across different machines), **so please always run the Step 3 below on the training machine to generate the gt tracking information**.
## nuScenes
**Step 1.** Download [nuScenes](https://www.nuscenes.org/download) dataset to `./datasets/nuscenes`.
**Step 2.** Generate annotation files for NuScenes dataset (the same as StreamMapNet)
```
python tools/data_converter/nuscenes_converter.py --data-root ./datasets/nuscenes
```
Add ``--newsplit`` to generate the metadata for the new split (geographical-based split) provided by StreamMapNet.
**Step 3.** Generate the tracking ground truth by
```
python tools/tracking/prepare_gt_tracks.py plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune.py --out-dir tracking_gts/nuscenes --visualize
```
Add the ``--visualize`` flag to visualize the data with element IDs derived from our track generation process, or remove it to save disk memory.
For generating the G.T. tracks of the new split, change the config file accordingly.
## Argoverse2
**Step 1.** Download [Argoverse2 (sensor)](https://argoverse.github.io/user-guide/getting_started.html#download-the-datasets) dataset to `./datasets/av2`.
**Step 2.** Generate annotation files for Argoverse2 dataset.
```
python tools/data_converter/argoverse_converter.py --data-root ./datasets/av2
```
**Step 3.** Generate the tracking ground truth by
```
python tools/tracking/prepare_gt_tracks.py plugin/configs/maptracker/av2_oldsplit/maptracker_av2_oldsplit_5frame_span10_stage3_joint_finetune.py --out-dir tracking_gts/av2 --visualize
```
## Checkpoints
We provide the checkpoints at [this Dropbox link](https://www.dropbox.com/scl/fo/miulg8q9oby7q2x5vemme/ALoxX1HyxGlfR9y3xlqfzeE?rlkey=i3rw4mbq7lacblc7xsnjkik1u&dl=0) or [this HuggingFace repo](https://huggingface.co/cccjc/maptracker/tree/main). Please download and place them as ``./work_dirs/pretrained_ckpts``.
## File structures
Make sure the final file structures look like below:
```
maptracker
├── mmdetection3d
├── tools
├── plugin
│ ├── configs
│ ├── models
│ ├── datasets
│ ├── ...
├── work_dirs
│ ├── pretrained_ckpts
│ │ ├── maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune
│ │ │ ├── latest.pth
│ │ ├── ...
│ ├── ....
├── datasets
│ ├── nuscenes
│ │ ├── maps <-- used
│ │ ├── samples <-- key frames
│ │ ├── v1.0-test <-- metadata
| | ├── v1.0-trainval <-- metadata and annotations
│ │ ├── nuscenes_map_infos_train_{newsplit}.pkl <-- train annotations
│ │ ├── nuscenes_map_infos_train_{newsplit}_gt_tracks.pkl <-- train gt tracks
│ │ ├── nuscenes_map_infos_val_{newsplit}.pkl <-- val annotations
│ │ ├── nuscenes_map_infos_val_{newsplit}_gt_trakcs.pkl <-- val gt tracks
│ ├── av2
│ │ ├── train
│ │ ├── val
│ │ ├── test
│ │ ├── maptrv2_val_samples_info.pkl <-- maptr's av2 metadata, used to align the val set
│ │ ├── av2_map_infos_train_{newsplit}.pkl <-- train annotations
│ │ ├── av2_map_infos_train_{newsplit}_gt_tracks.pkl <-- train gt tracks
│ │ ├── av2_map_infos_val_{newsplit}.pkl <-- val annotations
│ │ ├── av2_map_infos_val_{newsplit}_gt_trakcs.pkl <-- val gt tracks
```
================================================
FILE: docs/getting_started.md
================================================
# Getting started with MapTracker
In this document, we provide the commands for running inference/evaluation, training, and visualization.
## Inference and evaluation
### Inference and evaluate with Chamfer-based mAP
Run the following command to do inference and evaluation using the pretrained checkpoints, assuming 8 GPUs are used.
```
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash tools/dist_test.sh plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune.py work_dirs/pretrained_ckpts/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune/latest.pth 8 --eval --eval-options save_semantic=True
```
Set the ``--eval-options save_semantic=True`` to also save the semantic segmentation results of the BEV module.
### Evaluate with C-mAP
Generate prediction matching by
```
python tools/tracking/prepare_pred_tracks.py ${CONFIG} --result_path ${SUBMISSION_FILE} --cons_frames ${COMEBACK_FRAMES}
```
Evaluate with C-mAP by
```
python tools/tracking/calculate_cmap.py ${CONFIG} --result_path ${PRED_MATCHING_INFO}
```
An example evaluation:
```
python tools/tracking/calculate_cmap.py plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune.py --result_path ./work_dirs/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune/pos_predictions.pkl
```
### Results
By running with the checkpoints we provided in the [data preparation guide](docs/data_preparation.md), the expected results are:
| Dataset | Split | Divider | Crossing | Boundary | mAP | C-mAP |
|:------------------------------------------------------------------------:|:--------:|:-------:|:--------:|:--------:|:---------:|:-------------------------------------------------------------------------------------------:|
| nuScenes | old | 74.14 | 80.04 | 74.06 | 76.08 | 69.13 |
| nuScenes | new | 30.10 | 45.86 | 45.06 | 40.34 | 32.50 |
| Argoverse2 | old | 76.99 | 79.97 | 73.66 | 76.87 | 68.35 |
| Argoverse2 | new | 75.11 | 69.96 | 68.95 | 71.34 | 63.11 |
## Training
The training consists of three stages as detailed in the paper. We train the models on 8 Nvidia RTX A5000 GPUs.
**Stage 1**: BEV pretraining with semantic segmentation losses:
```
bash ./tools/dist_train.sh plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage1_bev_pretrain.py 8
```
**Stage 2**: Vector module warmup with a large batch size while freezing the BEV module:
```
bash ./tools/dist_train.sh plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage2_warmup.py 8
```
Set up the ``load_from=...`` properly in the config file to load the checkpoint from stage 1.
**Stage 3**: Joint finetuning:
```
bash ./tools/dist_train.sh plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune.py 8
```
Set up the ``load_from=...`` properly in the config file to load the checkpoint from stage 2.
## Visualization
### Global merged reconstruction (merged from local HD maps)
```bash
python tools/visualization/vis_global.py [path to method configuration file under plugin/configs] \
--data_path [path to the .pkl file] \
--out_dir [path to the output folder] \
--option [vis-pred / vis-gt: visualize predicted vectors / visualize ground truth vectors] \
--per_frame_result 1
```
Set the ``--per_frame_result`` to 1 to generate the per-frame video, the visualization is a bit slow; set it to 0 to only produce the final merged global reconstruction.
Examples:
```bash
# Visualize MapTracker's prediction
python tools/visualization/vis_global.py plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune.py \
--data_path work_dirs/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune/pos_predictions.pkl \
--out_dir vis_global/nuscenes_old/maptracker \
--option vis-pred --per_frame_result 1
# Visualize groud truth data
python tools/visualization/vis_global.py plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune.py \
--data_path datasets/nuscenes/nuscenes_map_infos_val_gt_tracks.pkl \
--out_dir vis_global/nuscenes_old/gt \
--option vis-gt --per_frame_result 0
```
### Local HD map reconstruction
```bash
python tools/visualization/vis_per_frame.py [path to method configuration file under plugin/configs] \
--data_path [path to the .pkl file] \
--out_dir [path to the data folder] \
--option [vis-pred / vis-gt: visualize predicted vectors / visualize ground truth vectors and input video streams]
```
Note that the input perspective-view videos will be saved when generating the ground truth visualization.
Examples:
```bash
# Visualize MapTracker's prediction
python tools/visualization/vis_per_frame.py plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune.py \
--data_path work_dirs/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune/pos_predictions.pkl \
--out_dir vis_local/nuscenes_old/maptracker \
--option vis-pred
# Visualize groud truth data
python tools/visualization/vis_per_frame.py plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune.py \
--data_path datasets/nuscenes/nuscenes_map_infos_val_gt_tracks.pkl \
--out_dir vis_local/nuscenes_old/gt \
--option vis-gt
```
================================================
FILE: docs/installation.md
================================================
# Environment Setup
We use the same environment as StreamMapNet and the environment setup is largely borrowed from their repo.
**Step 1.** Create conda environment and activate:
```
conda create --name maptracker python=3.8 -y
conda activate maptracker
```
**Step 2.** Install PyTorch.
```
pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html
```
**Step 3.** Install MMCV series.
```
# Install mmcv-series
pip install mmcv-full==1.6.0
pip install mmdet==2.28.2
pip install mmsegmentation==0.30.0
git clone https://github.com/open-mmlab/mmdetection3d.git
cd mmdetection3d
git checkout v1.0.0rc6
pip install -e .
```
**Step 4.** Install other requirements.
```
pip install -r requirements.txt
```
================================================
FILE: plugin/__init__.py
================================================
from .models import *
from .datasets import *
================================================
FILE: plugin/configs/_base_/datasets/coco_instance.py
================================================
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
pipeline=test_pipeline))
evaluation = dict(metric=['bbox', 'segm'])
================================================
FILE: plugin/configs/_base_/datasets/kitti-3d-3class.py
================================================
# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Pedestrian', 'Cyclist', 'Car']
point_cloud_range = [0, -40, -3, 70.4, 40, 1]
input_modality = dict(use_lidar=True, use_camera=False)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
classes=class_names,
sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6))
file_client_args = dict(backend='disk')
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
# file_client_args = dict(
# backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
num_try=100,
translation_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_range=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=6,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=2,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR')),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True,
box_type_3d='LiDAR'),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True,
box_type_3d='LiDAR'))
evaluation = dict(interval=1, pipeline=eval_pipeline)
================================================
FILE: plugin/configs/_base_/datasets/kitti-3d-car.py
================================================
# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Car']
point_cloud_range = [0, -40, -3, 70.4, 40, 1]
input_modality = dict(use_lidar=True, use_camera=False)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
classes=class_names,
sample_groups=dict(Car=15))
file_client_args = dict(backend='disk')
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
# file_client_args = dict(
# backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
num_try=100,
translation_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_range=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=6,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=2,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR')),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True,
box_type_3d='LiDAR'),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True,
box_type_3d='LiDAR'))
evaluation = dict(interval=1, pipeline=eval_pipeline)
================================================
FILE: plugin/configs/_base_/datasets/lyft-3d.py
================================================
# If point cloud range is changed, the models should also change their point
# cloud range accordingly
point_cloud_range = [-80, -80, -5, 80, 80, 3]
# For Lyft we usually do 9-class detection
class_names = [
'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',
'bicycle', 'pedestrian', 'animal'
]
dataset_type = 'LyftDataset'
data_root = 'data/lyft/'
# Input modality for Lyft dataset, this is consistent with the submission
# format which requires the information in input_modality.
input_modality = dict(
use_lidar=True,
use_camera=False,
use_radar=False,
use_map=False,
use_external=False)
file_client_args = dict(backend='disk')
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
# file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/lyft/': 's3://lyft/lyft/',
# 'data/lyft/': 's3://lyft/lyft/'
# }))
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.3925, 0.3925],
scale_ratio_range=[0.95, 1.05],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'lyft_infos_train.pkl',
pipeline=train_pipeline,
classes=class_names,
modality=input_modality,
test_mode=False),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'lyft_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
modality=input_modality,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'lyft_infos_test.pkl',
pipeline=test_pipeline,
classes=class_names,
modality=input_modality,
test_mode=True))
# For Lyft dataset, we usually evaluate the model at the end of training.
# Since the models are trained by 24 epochs by default, we set evaluation
# interval to be 24. Please change the interval accordingly if you do not
# use a default schedule.
evaluation = dict(interval=24, pipeline=eval_pipeline)
================================================
FILE: plugin/configs/_base_/datasets/nuim_instance.py
================================================
dataset_type = 'CocoDataset'
data_root = 'data/nuimages/'
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(
type='Resize',
img_scale=[(1280, 720), (1920, 1080)],
multiscale_mode='range',
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1600, 900),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/nuimages_v1.0-train.json',
img_prefix=data_root,
classes=class_names,
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/nuimages_v1.0-val.json',
img_prefix=data_root,
classes=class_names,
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/nuimages_v1.0-val.json',
img_prefix=data_root,
classes=class_names,
pipeline=test_pipeline))
evaluation = dict(metric=['bbox', 'segm'])
================================================
FILE: plugin/configs/_base_/datasets/nus-3d.py
================================================
# If point cloud range is changed, the models should also change their point
# cloud range accordingly
point_cloud_range = [-50, -50, -5, 50, 50, 3]
# For nuScenes we usually do 10-class detection
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
dataset_type = 'NuScenesDataset'
data_root = 'data/nuscenes/'
# Input modality for nuScenes dataset, this is consistent with the submission
# format which requires the information in input_modality.
input_modality = dict(
use_lidar=True,
use_camera=False,
use_radar=False,
use_map=False,
use_external=False)
file_client_args = dict(backend='disk')
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
# file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/nuscenes/': 's3://nuscenes/nuscenes/',
# 'data/nuscenes/': 's3://nuscenes/nuscenes/'
# }))
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.3925, 0.3925],
scale_ratio_range=[0.95, 1.05],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectNameFilter', classes=class_names),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_train.pkl',
pipeline=train_pipeline,
classes=class_names,
modality=input_modality,
test_mode=False,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR'),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
modality=input_modality,
test_mode=True,
box_type_3d='LiDAR'),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
modality=input_modality,
test_mode=True,
box_type_3d='LiDAR'))
# For nuScenes dataset, we usually evaluate the model at the end of training.
# Since the models are trained by 24 epochs by default, we set evaluation
# interval to be 24. Please change the interval accordingly if you do not
# use a default schedule.
evaluation = dict(interval=24, pipeline=eval_pipeline)
================================================
FILE: plugin/configs/_base_/datasets/nus-mono3d.py
================================================
dataset_type = 'NuScenesMonoDataset'
data_root = 'data/nuscenes/'
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
# Input modality for nuScenes dataset, this is consistent with the submission
# format which requires the information in input_modality.
input_modality = dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False)
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFileMono3D'),
dict(
type='LoadAnnotations3D',
with_bbox=True,
with_label=True,
with_attr_label=True,
with_bbox_3d=True,
with_label_3d=True,
with_bbox_depth=True),
dict(type='Resize', img_scale=(1600, 900), keep_ratio=True),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
keys=[
'img', 'gt_bboxes', 'gt_labels', 'attr_labels', 'gt_bboxes_3d',
'gt_labels_3d', 'centers2d', 'depths'
]),
]
test_pipeline = [
dict(type='LoadImageFromFileMono3D'),
dict(
type='MultiScaleFlipAug',
scale_factor=1.0,
flip=False,
transforms=[
dict(type='RandomFlip3D'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['img']),
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(type='LoadImageFromFileMono3D'),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['img'])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_train_mono3d.coco.json',
img_prefix=data_root,
classes=class_names,
pipeline=train_pipeline,
modality=input_modality,
test_mode=False,
box_type_3d='Camera'),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',
img_prefix=data_root,
classes=class_names,
pipeline=test_pipeline,
modality=input_modality,
test_mode=True,
box_type_3d='Camera'),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',
img_prefix=data_root,
classes=class_names,
pipeline=test_pipeline,
modality=input_modality,
test_mode=True,
box_type_3d='Camera'))
evaluation = dict(interval=2)
================================================
FILE: plugin/configs/_base_/datasets/range100_lyft-3d.py
================================================
# If point cloud range is changed, the models should also change their point
# cloud range accordingly
point_cloud_range = [-100, -100, -5, 100, 100, 3]
# For Lyft we usually do 9-class detection
class_names = [
'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',
'bicycle', 'pedestrian', 'animal'
]
dataset_type = 'LyftDataset'
data_root = 'data/lyft/'
# Input modality for Lyft dataset, this is consistent with the submission
# format which requires the information in input_modality.
input_modality = dict(
use_lidar=True,
use_camera=False,
use_radar=False,
use_map=False,
use_external=False)
file_client_args = dict(backend='disk')
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
# file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/lyft/': 's3://lyft/lyft/',
# 'data/lyft/': 's3://lyft/lyft/'
# }))
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.3925, 0.3925],
scale_ratio_range=[0.95, 1.05],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'lyft_infos_train.pkl',
pipeline=train_pipeline,
classes=class_names,
modality=input_modality,
test_mode=False),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'lyft_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
modality=input_modality,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'lyft_infos_test.pkl',
pipeline=test_pipeline,
classes=class_names,
modality=input_modality,
test_mode=True))
# For Lyft dataset, we usually evaluate the model at the end of training.
# Since the models are trained by 24 epochs by default, we set evaluation
# interval to be 24. Please change the interval accordingly if you do not
# use a default schedule.
evaluation = dict(interval=24, pipeline=eval_pipeline)
================================================
FILE: plugin/configs/_base_/datasets/s3dis_seg-3d-13class.py
================================================
# dataset settings
dataset_type = 'S3DISSegDataset'
data_root = './data/s3dis/'
class_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door',
'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter')
num_points = 4096
train_area = [1, 2, 3, 4, 6]
test_area = 5
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=tuple(range(len(class_names))),
max_cat_id=13),
dict(
type='IndoorPatchPointSample',
num_points=num_points,
block_size=1.0,
ignore_index=len(class_names),
use_normalized_coord=True,
enlarge_size=0.2,
min_unique_num=None),
dict(type='NormalizePointsColor', color_mean=None),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='NormalizePointsColor', color_mean=None),
dict(
# a wrapper in order to successfully call test function
# actually we don't perform test-time-aug
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.0,
flip_ratio_bev_vertical=0.0),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
# we need to load gt seg_mask!
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=tuple(range(len(class_names))),
max_cat_id=13),
dict(
type='DefaultFormatBundle3D',
with_label=False,
class_names=class_names),
dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
# train on area 1, 2, 3, 4, 6
# test on area 5
train=dict(
type=dataset_type,
data_root=data_root,
ann_files=[
data_root + f's3dis_infos_Area_{i}.pkl' for i in train_area
],
pipeline=train_pipeline,
classes=class_names,
test_mode=False,
ignore_index=len(class_names),
scene_idxs=[
data_root + f'seg_info/Area_{i}_resampled_scene_idxs.npy'
for i in train_area
]),
val=dict(
type=dataset_type,
data_root=data_root,
ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True,
ignore_index=len(class_names),
scene_idxs=data_root +
f'seg_info/Area_{test_area}_resampled_scene_idxs.npy'),
test=dict(
type=dataset_type,
data_root=data_root,
ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True,
ignore_index=len(class_names)))
evaluation = dict(pipeline=eval_pipeline)
================================================
FILE: plugin/configs/_base_/datasets/scannet-3d-18class.py
================================================
# dataset settings
dataset_type = 'ScanNetDataset'
data_root = './data/scannet/'
class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain',
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin')
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
with_mask_3d=True,
with_seg_3d=True),
dict(type='GlobalAlignment', rotation_axis=2),
dict(
type='PointSegClassMapping',
valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34,
36, 39),
max_cat_id=40),
dict(type='IndoorPointSample', num_points=40000),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0],
shift_height=True),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
keys=[
'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
'pts_instance_mask'
])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='GlobalAlignment', rotation_axis=2),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='IndoorPointSample', num_points=40000),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='GlobalAlignment', rotation_axis=2),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=5,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_train.pkl',
pipeline=train_pipeline,
filter_empty_gt=False,
classes=class_names,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Depth')),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True,
box_type_3d='Depth'),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True,
box_type_3d='Depth'))
evaluation = dict(pipeline=eval_pipeline)
================================================
FILE: plugin/configs/_base_/datasets/scannet_seg-3d-20class.py
================================================
# dataset settings
dataset_type = 'ScanNetSegDataset'
data_root = './data/scannet/'
class_names = ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table',
'door', 'window', 'bookshelf', 'picture', 'counter', 'desk',
'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink',
'bathtub', 'otherfurniture')
num_points = 8192
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
33, 34, 36, 39),
max_cat_id=40),
dict(
type='IndoorPatchPointSample',
num_points=num_points,
block_size=1.5,
ignore_index=len(class_names),
use_normalized_coord=False,
enlarge_size=0.2,
min_unique_num=None),
dict(type='NormalizePointsColor', color_mean=None),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='NormalizePointsColor', color_mean=None),
dict(
# a wrapper in order to successfully call test function
# actually we don't perform test-time-aug
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.0,
flip_ratio_bev_vertical=0.0),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
# we need to load gt seg_mask!
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
33, 34, 36, 39),
max_cat_id=40),
dict(
type='DefaultFormatBundle3D',
with_label=False,
class_names=class_names),
dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_train.pkl',
pipeline=train_pipeline,
classes=class_names,
test_mode=False,
ignore_index=len(class_names),
scene_idxs=data_root + 'seg_info/train_resampled_scene_idxs.npy'),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True,
ignore_index=len(class_names)),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True,
ignore_index=len(class_names)))
evaluation = dict(pipeline=eval_pipeline)
================================================
FILE: plugin/configs/_base_/datasets/sunrgbd-3d-10class.py
================================================
dataset_type = 'SUNRGBDDataset'
data_root = 'data/sunrgbd/'
class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
'night_stand', 'bookshelf', 'bathtub')
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='LoadAnnotations3D'),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.523599, 0.523599],
scale_ratio_range=[0.85, 1.15],
shift_height=True),
dict(type='IndoorPointSample', num_points=20000),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
),
dict(type='IndoorPointSample', num_points=20000),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2]),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=16,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=5,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'sunrgbd_infos_train.pkl',
pipeline=train_pipeline,
classes=class_names,
filter_empty_gt=False,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Depth')),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'sunrgbd_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True,
box_type_3d='Depth'),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'sunrgbd_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True,
box_type_3d='Depth'))
evaluation = dict(pipeline=eval_pipeline)
================================================
FILE: plugin/configs/_base_/datasets/waymoD5-3d-3class.py
================================================
# dataset settings
# D5 in the config name means the whole dataset is divided into 5 folds
# We only use one fold for efficient experiments
dataset_type = 'WaymoDataset'
data_root = 'data/waymo/kitti_format/'
file_client_args = dict(backend='disk')
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
# file_client_args = dict(
# backend='petrel', path_mapping=dict(data='s3://waymo_data/'))
class_names = ['Car', 'Pedestrian', 'Cyclist']
point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]
input_modality = dict(use_lidar=True, use_camera=False)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'waymo_dbinfos_train.pkl',
rate=1.0,
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
classes=class_names,
sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10),
points_loader=dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=[0, 1, 2, 3, 4],
file_client_args=file_client_args))
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=6,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=6,
use_dim=5,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=6,
use_dim=5,
file_client_args=file_client_args),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=2,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'waymo_infos_train.pkl',
split='training',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR',
# load one frame every five frames
load_interval=5)),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'waymo_infos_val.pkl',
split='training',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True,
box_type_3d='LiDAR'),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'waymo_infos_val.pkl',
split='training',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True,
box_type_3d='LiDAR'))
evaluation = dict(interval=24, pipeline=eval_pipeline)
================================================
FILE: plugin/configs/_base_/datasets/waymoD5-3d-car.py
================================================
# dataset settings
# D5 in the config name means the whole dataset is divided into 5 folds
# We only use one fold for efficient experiments
dataset_type = 'WaymoDataset'
data_root = 'data/waymo/kitti_format/'
file_client_args = dict(backend='disk')
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
# file_client_args = dict(
# backend='petrel', path_mapping=dict(data='s3://waymo_data/'))
class_names = ['Car']
point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]
input_modality = dict(use_lidar=True, use_camera=False)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'waymo_dbinfos_train.pkl',
rate=1.0,
prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
classes=class_names,
sample_groups=dict(Car=15),
points_loader=dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=[0, 1, 2, 3, 4],
file_client_args=file_client_args))
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=6,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=6,
use_dim=5,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=6,
use_dim=5,
file_client_args=file_client_args),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=2,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'waymo_infos_train.pkl',
split='training',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR',
# load one frame every five frames
load_interval=5)),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'waymo_infos_val.pkl',
split='training',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True,
box_type_3d='LiDAR'),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'waymo_infos_val.pkl',
split='training',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True,
box_type_3d='LiDAR'))
evaluation = dict(interval=24, pipeline=eval_pipeline)
================================================
FILE: plugin/configs/_base_/default_runtime.py
================================================
checkpoint_config = dict(interval=1)
# yapf:disable push
# By default we use textlogger hook and tensorboard
# For more loggers see
# https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = None
load_from = None
resume_from = None
workflow = [('train', 1)]
================================================
FILE: plugin/configs/_base_/models/3dssd.py
================================================
model = dict(
type='SSD3DNet',
backbone=dict(
type='PointNet2SAMSG',
in_channels=4,
num_points=(4096, 512, (256, 256)),
radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)),
num_samples=((32, 32, 64), (32, 32, 64), (32, 32, 32)),
sa_channels=(((16, 16, 32), (16, 16, 32), (32, 32, 64)),
((64, 64, 128), (64, 64, 128), (64, 96, 128)),
((128, 128, 256), (128, 192, 256), (128, 256, 256))),
aggregation_channels=(64, 128, 256),
fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')),
fps_sample_range_lists=((-1), (-1), (512, -1)),
norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),
sa_cfg=dict(
type='PointSAModuleMSG',
pool_mod='max',
use_xyz=True,
normalize_xyz=False)),
bbox_head=dict(
type='SSD3DHead',
in_channels=256,
vote_module_cfg=dict(
in_channels=256,
num_points=256,
gt_per_seed=1,
conv_channels=(128, ),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
with_res_feat=False,
vote_xyz_range=(3.0, 3.0, 2.0)),
vote_aggregation_cfg=dict(
type='PointSAModuleMSG',
num_point=256,
radii=(4.8, 6.4),
sample_nums=(16, 32),
mlp_channels=((256, 256, 256, 512), (256, 256, 512, 1024)),
norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),
use_xyz=True,
normalize_xyz=False,
bias=True),
pred_layer_cfg=dict(
in_channels=1536,
shared_conv_channels=(512, 128),
cls_conv_channels=(128, ),
reg_conv_channels=(128, ),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
bias=True),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
objectness_loss=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
reduction='sum',
loss_weight=1.0),
center_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
size_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
corner_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
vote_loss=dict(type='SmoothL1Loss', reduction='sum', loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(
sample_mod='spec', pos_distance_thr=10.0, expand_dims_length=0.05),
test_cfg=dict(
nms_cfg=dict(type='nms', iou_thr=0.1),
sample_mod='spec',
score_thr=0.0,
per_class_proposal=True,
max_output_num=100))
================================================
FILE: plugin/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py
================================================
# model settings
model = dict(
type='CascadeRCNN',
pretrained='torchvision://resnet50',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
roi_head=dict(
type='CascadeRoIHead',
num_stages=3,
stage_loss_weights=[1, 0.5, 0.25],
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=[
dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
loss_weight=1.0)),
dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1]),
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
loss_weight=1.0)),
dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.033, 0.033, 0.067, 0.067]),
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
],
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='FCNMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=80,
loss_mask=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
# model training and testing settings
train_cfg=dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=[
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.6,
neg_iou_thr=0.6,
min_pos_iou=0.6,
match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.7,
min_pos_iou=0.7,
match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False)
]),
test_cfg=dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100,
mask_thr_binary=0.5)))
================================================
FILE: plugin/configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py
================================================
voxel_size = [0.1, 0.1, 0.2]
model = dict(
type='CenterPoint',
pts_voxel_layer=dict(
max_num_points=10, voxel_size=voxel_size, max_voxels=(90000, 120000)),
pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5),
pts_middle_encoder=dict(
type='SparseEncoder',
in_channels=5,
sparse_shape=[41, 1024, 1024],
output_channels=128,
order=('conv', 'norm', 'act'),
encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128,
128)),
encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
block_type='basicblock'),
pts_backbone=dict(
type='SECOND',
in_channels=256,
out_channels=[128, 256],
layer_nums=[5, 5],
layer_strides=[1, 2],
norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
conv_cfg=dict(type='Conv2d', bias=False)),
pts_neck=dict(
type='SECONDFPN',
in_channels=[128, 256],
out_channels=[256, 256],
upsample_strides=[1, 2],
norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
upsample_cfg=dict(type='deconv', bias=False),
use_conv_for_no_stride=True),
pts_bbox_head=dict(
type='CenterHead',
in_channels=sum([256, 256]),
tasks=[
dict(num_class=1, class_names=['car']),
dict(num_class=2, class_names=['truck', 'construction_vehicle']),
dict(num_class=2, class_names=['bus', 'trailer']),
dict(num_class=1, class_names=['barrier']),
dict(num_class=2, class_names=['motorcycle', 'bicycle']),
dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
],
common_heads=dict(
reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
share_conv_channel=64,
bbox_coder=dict(
type='CenterPointBBoxCoder',
post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
max_num=500,
score_threshold=0.1,
out_size_factor=8,
voxel_size=voxel_size[:2],
code_size=9),
separate_head=dict(
type='SeparateHead', init_bias=-2.19, final_kernel=3),
loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
norm_bbox=True),
# model training and testing settings
train_cfg=dict(
pts=dict(
grid_size=[1024, 1024, 40],
voxel_size=voxel_size,
out_size_factor=8,
dense_reg=1,
gaussian_overlap=0.1,
max_objs=500,
min_radius=2,
code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
test_cfg=dict(
pts=dict(
post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
max_per_img=500,
max_pool_nms=False,
min_radius=[4, 12, 10, 1, 0.85, 0.175],
score_threshold=0.1,
out_size_factor=8,
voxel_size=voxel_size[:2],
nms_type='rotate',
pre_max_size=1000,
post_max_size=83,
nms_thr=0.2)))
================================================
FILE: plugin/configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py
================================================
voxel_size = [0.2, 0.2, 8]
model = dict(
type='CenterPoint',
pts_voxel_layer=dict(
max_num_points=20, voxel_size=voxel_size, max_voxels=(30000, 40000)),
pts_voxel_encoder=dict(
type='PillarFeatureNet',
in_channels=5,
feat_channels=[64],
with_distance=False,
voxel_size=(0.2, 0.2, 8),
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
legacy=False),
pts_middle_encoder=dict(
type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)),
pts_backbone=dict(
type='SECOND',
in_channels=64,
out_channels=[64, 128, 256],
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
conv_cfg=dict(type='Conv2d', bias=False)),
pts_neck=dict(
type='SECONDFPN',
in_channels=[64, 128, 256],
out_channels=[128, 128, 128],
upsample_strides=[0.5, 1, 2],
norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
upsample_cfg=dict(type='deconv', bias=False),
use_conv_for_no_stride=True),
pts_bbox_head=dict(
type='CenterHead',
in_channels=sum([128, 128, 128]),
tasks=[
dict(num_class=1, class_names=['car']),
dict(num_class=2, class_names=['truck', 'construction_vehicle']),
dict(num_class=2, class_names=['bus', 'trailer']),
dict(num_class=1, class_names=['barrier']),
dict(num_class=2, class_names=['motorcycle', 'bicycle']),
dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
],
common_heads=dict(
reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
share_conv_channel=64,
bbox_coder=dict(
type='CenterPointBBoxCoder',
post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
max_num=500,
score_threshold=0.1,
out_size_factor=4,
voxel_size=voxel_size[:2],
code_size=9),
separate_head=dict(
type='SeparateHead', init_bias=-2.19, final_kernel=3),
loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
norm_bbox=True),
# model training and testing settings
train_cfg=dict(
pts=dict(
grid_size=[512, 512, 1],
voxel_size=voxel_size,
out_size_factor=4,
dense_reg=1,
gaussian_overlap=0.1,
max_objs=500,
min_radius=2,
code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
test_cfg=dict(
pts=dict(
post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
max_per_img=500,
max_pool_nms=False,
min_radius=[4, 12, 10, 1, 0.85, 0.175],
score_threshold=0.1,
pc_range=[-51.2, -51.2],
out_size_factor=4,
voxel_size=voxel_size[:2],
nms_type='rotate',
pre_max_size=1000,
post_max_size=83,
nms_thr=0.2)))
================================================
FILE: plugin/configs/_base_/models/fcos3d.py
================================================
model = dict(
type='FCOSMono3D',
pretrained='open-mmlab://detectron2/resnet101_caffe',
backbone=dict(
type='ResNet',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
style='caffe'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
start_level=1,
add_extra_convs='on_output',
num_outs=5,
relu_before_extra_convs=True),
bbox_head=dict(
type='FCOSMono3DHead',
num_classes=10,
in_channels=256,
stacked_convs=2,
feat_channels=256,
use_direction_classifier=True,
diff_rad_by_sin=True,
pred_attrs=True,
pred_velo=True,
dir_offset=0.7854, # pi/4
strides=[8, 16, 32, 64, 128],
group_reg_dims=(2, 1, 3, 1, 2), # offset, depth, size, rot, velo
cls_branch=(256, ),
reg_branch=(
(256, ), # offset
(256, ), # depth
(256, ), # size
(256, ), # rot
() # velo
),
dir_branch=(256, ),
attr_branch=(256, ),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_attr=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_centerness=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
norm_on_bbox=True,
centerness_on_reg=True,
center_sampling=True,
conv_bias=True,
dcn_on_last_conv=True),
train_cfg=dict(
allowed_border=0,
code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05],
pos_weight=-1,
debug=False),
test_cfg=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.8,
score_thr=0.05,
min_bbox_size=0,
max_per_img=200))
================================================
FILE: plugin/configs/_base_/models/groupfree3d.py
================================================
model = dict(
type='GroupFree3DNet',
backbone=dict(
type='PointNet2SASSG',
in_channels=3,
num_points=(2048, 1024, 512, 256),
radius=(0.2, 0.4, 0.8, 1.2),
num_samples=(64, 32, 16, 16),
sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
(128, 128, 256)),
fp_channels=((256, 256), (256, 288)),
norm_cfg=dict(type='BN2d'),
sa_cfg=dict(
type='PointSAModule',
pool_mod='max',
use_xyz=True,
normalize_xyz=True)),
bbox_head=dict(
type='GroupFree3DHead',
in_channels=288,
num_decoder_layers=6,
num_proposal=256,
transformerlayers=dict(
type='BaseTransformerLayer',
attn_cfgs=dict(
type='GroupFree3DMHA',
embed_dims=288,
num_heads=8,
attn_drop=0.1,
dropout_layer=dict(type='Dropout', drop_prob=0.1)),
ffn_cfgs=dict(
embed_dims=288,
feedforward_channels=2048,
ffn_drop=0.1,
act_cfg=dict(type='ReLU', inplace=True)),
operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn',
'norm')),
pred_layer_cfg=dict(
in_channels=288, shared_conv_channels=(288, 288), bias=True),
sampling_objectness_loss=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=8.0),
objectness_loss=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
center_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict(
type='SmoothL1Loss', beta=1.0, reduction='sum', loss_weight=10.0),
semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(sample_mod='kps'),
test_cfg=dict(
sample_mod='kps',
nms_thr=0.25,
score_thr=0.0,
per_class_proposal=True,
prediction_stages='last'))
================================================
FILE: plugin/configs/_base_/models/h3dnet.py
================================================
primitive_z_cfg = dict(
type='PrimitiveHead',
num_dims=2,
num_classes=18,
primitive_mode='z',
upper_thresh=100.0,
surface_thresh=0.5,
vote_module_cfg=dict(
in_channels=256,
vote_per_seed=1,
gt_per_seed=1,
conv_channels=(256, 256),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
norm_feats=True,
vote_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='none',
loss_dst_weight=10.0)),
vote_aggregation_cfg=dict(
type='PointSAModule',
num_point=1024,
radius=0.3,
num_sample=16,
mlp_channels=[256, 128, 128, 128],
use_xyz=True,
normalize_xyz=True),
feat_channels=(128, 128),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.4, 0.6],
reduction='mean',
loss_weight=30.0),
center_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='sum',
loss_src_weight=0.5,
loss_dst_weight=0.5),
semantic_reg_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='sum',
loss_src_weight=0.5,
loss_dst_weight=0.5),
semantic_cls_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
train_cfg=dict(
dist_thresh=0.2,
var_thresh=1e-2,
lower_thresh=1e-6,
num_point=100,
num_point_line=10,
line_thresh=0.2))
primitive_xy_cfg = dict(
type='PrimitiveHead',
num_dims=1,
num_classes=18,
primitive_mode='xy',
upper_thresh=100.0,
surface_thresh=0.5,
vote_module_cfg=dict(
in_channels=256,
vote_per_seed=1,
gt_per_seed=1,
conv_channels=(256, 256),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
norm_feats=True,
vote_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='none',
loss_dst_weight=10.0)),
vote_aggregation_cfg=dict(
type='PointSAModule',
num_point=1024,
radius=0.3,
num_sample=16,
mlp_channels=[256, 128, 128, 128],
use_xyz=True,
normalize_xyz=True),
feat_channels=(128, 128),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.4, 0.6],
reduction='mean',
loss_weight=30.0),
center_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='sum',
loss_src_weight=0.5,
loss_dst_weight=0.5),
semantic_reg_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='sum',
loss_src_weight=0.5,
loss_dst_weight=0.5),
semantic_cls_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
train_cfg=dict(
dist_thresh=0.2,
var_thresh=1e-2,
lower_thresh=1e-6,
num_point=100,
num_point_line=10,
line_thresh=0.2))
primitive_line_cfg = dict(
type='PrimitiveHead',
num_dims=0,
num_classes=18,
primitive_mode='line',
upper_thresh=100.0,
surface_thresh=0.5,
vote_module_cfg=dict(
in_channels=256,
vote_per_seed=1,
gt_per_seed=1,
conv_channels=(256, 256),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
norm_feats=True,
vote_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='none',
loss_dst_weight=10.0)),
vote_aggregation_cfg=dict(
type='PointSAModule',
num_point=1024,
radius=0.3,
num_sample=16,
mlp_channels=[256, 128, 128, 128],
use_xyz=True,
normalize_xyz=True),
feat_channels=(128, 128),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.4, 0.6],
reduction='mean',
loss_weight=30.0),
center_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='sum',
loss_src_weight=1.0,
loss_dst_weight=1.0),
semantic_reg_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='sum',
loss_src_weight=1.0,
loss_dst_weight=1.0),
semantic_cls_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=2.0),
train_cfg=dict(
dist_thresh=0.2,
var_thresh=1e-2,
lower_thresh=1e-6,
num_point=100,
num_point_line=10,
line_thresh=0.2))
model = dict(
type='H3DNet',
backbone=dict(
type='MultiBackbone',
num_streams=4,
suffixes=['net0', 'net1', 'net2', 'net3'],
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d', eps=1e-5, momentum=0.01),
act_cfg=dict(type='ReLU'),
backbones=dict(
type='PointNet2SASSG',
in_channels=4,
num_points=(2048, 1024, 512, 256),
radius=(0.2, 0.4, 0.8, 1.2),
num_samples=(64, 32, 16, 16),
sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
(128, 128, 256)),
fp_channels=((256, 256), (256, 256)),
norm_cfg=dict(type='BN2d'),
sa_cfg=dict(
type='PointSAModule',
pool_mod='max',
use_xyz=True,
normalize_xyz=True))),
rpn_head=dict(
type='VoteHead',
vote_module_cfg=dict(
in_channels=256,
vote_per_seed=1,
gt_per_seed=3,
conv_channels=(256, 256),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
norm_feats=True,
vote_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='none',
loss_dst_weight=10.0)),
vote_aggregation_cfg=dict(
type='PointSAModule',
num_point=256,
radius=0.3,
num_sample=16,
mlp_channels=[256, 128, 128, 128],
use_xyz=True,
normalize_xyz=True),
pred_layer_cfg=dict(
in_channels=128, shared_conv_channels=(128, 128), bias=True),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.2, 0.8],
reduction='sum',
loss_weight=5.0),
center_loss=dict(
type='ChamferDistance',
mode='l2',
reduction='sum',
loss_src_weight=10.0,
loss_dst_weight=10.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
roi_head=dict(
type='H3DRoIHead',
primitive_list=[primitive_z_cfg, primitive_xy_cfg, primitive_line_cfg],
bbox_head=dict(
type='H3DBboxHead',
gt_per_seed=3,
num_proposal=256,
suface_matching_cfg=dict(
type='PointSAModule',
num_point=256 * 6,
radius=0.5,
num_sample=32,
mlp_channels=[128 + 6, 128, 64, 32],
use_xyz=True,
normalize_xyz=True),
line_matching_cfg=dict(
type='PointSAModule',
num_point=256 * 12,
radius=0.5,
num_sample=32,
mlp_channels=[128 + 12, 128, 64, 32],
use_xyz=True,
normalize_xyz=True),
feat_channels=(128, 128),
primitive_refine_channels=[128, 128, 128],
upper_thresh=100.0,
surface_thresh=0.5,
line_thresh=0.5,
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.2, 0.8],
reduction='sum',
loss_weight=5.0),
center_loss=dict(
type='ChamferDistance',
mode='l2',
reduction='sum',
loss_src_weight=10.0,
loss_dst_weight=10.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=0.1),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=0.1),
size_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=0.1),
cues_objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.3, 0.7],
reduction='mean',
loss_weight=5.0),
cues_semantic_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.3, 0.7],
reduction='mean',
loss_weight=5.0),
proposal_objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.2, 0.8],
reduction='none',
loss_weight=5.0),
primitive_center_loss=dict(
type='MSELoss', reduction='none', loss_weight=1.0))),
# model training and testing settings
train_cfg=dict(
rpn=dict(
pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'),
rpn_proposal=dict(use_nms=False),
rcnn=dict(
pos_distance_thr=0.3,
neg_distance_thr=0.6,
sample_mod='vote',
far_threshold=0.6,
near_threshold=0.3,
mask_surface_threshold=0.3,
label_surface_threshold=0.3,
mask_line_threshold=0.3,
label_line_threshold=0.3)),
test_cfg=dict(
rpn=dict(
sample_mod='seed',
nms_thr=0.25,
score_thr=0.05,
per_class_proposal=True,
use_nms=False),
rcnn=dict(
sample_mod='seed',
nms_thr=0.25,
score_thr=0.05,
per_class_proposal=True)))
================================================
FILE: plugin/configs/_base_/models/hv_pointpillars_fpn_lyft.py
================================================
_base_ = './hv_pointpillars_fpn_nus.py'
# model settings (based on nuScenes model settings)
# Voxel size for voxel encoder
# Usually voxel size is changed consistently with the point cloud range
# If point cloud range is modified, do remember to change all related
# keys in the config.
model = dict(
pts_voxel_layer=dict(
max_num_points=20,
point_cloud_range=[-80, -80, -5, 80, 80, 3],
max_voxels=(60000, 60000)),
pts_voxel_encoder=dict(
feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]),
pts_middle_encoder=dict(output_shape=[640, 640]),
pts_bbox_head=dict(
num_classes=9,
anchor_generator=dict(
ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]),
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),
# model training settings (based on nuScenes model settings)
train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))
================================================
FILE: plugin/configs/_base_/models/hv_pointpillars_fpn_nus.py
================================================
# model settings
# Voxel size for voxel encoder
# Usually voxel size is changed consistently with the point cloud range
# If point cloud range is modified, do remember to change all related
# keys in the config.
voxel_size = [0.25, 0.25, 8]
model = dict(
type='MVXFasterRCNN',
pts_voxel_layer=dict(
max_num_points=64,
point_cloud_range=[-50, -50, -5, 50, 50, 3],
voxel_size=voxel_size,
max_voxels=(30000, 40000)),
pts_voxel_encoder=dict(
type='HardVFE',
in_channels=4,
feat_channels=[64, 64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=[-50, -50, -5, 50, 50, 3],
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
pts_middle_encoder=dict(
type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]),
pts_backbone=dict(
type='SECOND',
in_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
out_channels=[64, 128, 256]),
pts_neck=dict(
type='FPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
act_cfg=dict(type='ReLU'),
in_channels=[64, 128, 256],
out_channels=256,
start_level=0,
num_outs=3),
pts_bbox_head=dict(
type='Anchor3DHead',
num_classes=10,
in_channels=256,
feat_channels=256,
use_direction_classifier=True,
anchor_generator=dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-50, -50, -1.8, 50, 50, -1.8]],
scales=[1, 2, 4],
sizes=[
[0.8660, 2.5981, 1.], # 1.5/sqrt(3)
[0.5774, 1.7321, 1.], # 1/sqrt(3)
[1., 1., 1.],
[0.4, 0.4, 1],
],
custom_values=[0, 0],
rotations=[0, 1.57],
reshape_out=True),
assigner_per_size=False,
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
# model training and testing settings
train_cfg=dict(
pts=dict(
assigner=dict(
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
pos_weight=-1,
debug=False)),
test_cfg=dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.2,
score_thr=0.05,
min_bbox_size=0,
max_num=500)))
================================================
FILE: plugin/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py
================================================
_base_ = './hv_pointpillars_fpn_nus.py'
# model settings (based on nuScenes model settings)
# Voxel size for voxel encoder
# Usually voxel size is changed consistently with the point cloud range
# If point cloud range is modified, do remember to change all related
# keys in the config.
model = dict(
pts_voxel_layer=dict(
max_num_points=20,
point_cloud_range=[-100, -100, -5, 100, 100, 3],
max_voxels=(60000, 60000)),
pts_voxel_encoder=dict(
feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]),
pts_middle_encoder=dict(output_shape=[800, 800]),
pts_bbox_head=dict(
num_classes=9,
anchor_generator=dict(
ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]),
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),
# model training settings (based on nuScenes model settings)
train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))
================================================
FILE: plugin/configs/_base_/models/hv_pointpillars_secfpn_kitti.py
================================================
voxel_size = [0.16, 0.16, 4]
model = dict(
type='VoxelNet',
voxel_layer=dict(
max_num_points=32, # max_points_per_voxel
point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1],
voxel_size=voxel_size,
max_voxels=(16000, 40000) # (training, testing) max_voxels
),
voxel_encoder=dict(
type='PillarFeatureNet',
in_channels=4,
feat_channels=[64],
with_distance=False,
voxel_size=voxel_size,
point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]),
middle_encoder=dict(
type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]),
backbone=dict(
type='SECOND',
in_channels=64,
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
out_channels=[64, 128, 256]),
neck=dict(
type='SECONDFPN',
in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128]),
bbox_head=dict(
type='Anchor3DHead',
num_classes=3,
in_channels=384,
feat_channels=384,
use_direction_classifier=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[
[0, -39.68, -0.6, 70.4, 39.68, -0.6],
[0, -39.68, -0.6, 70.4, 39.68, -0.6],
[0, -39.68, -1.78, 70.4, 39.68, -1.78],
],
sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
# model training and testing settings
train_cfg=dict(
assigner=[
dict( # for Pedestrian
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # for Cyclist
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
],
allowed_border=0,
pos_weight=-1,
debug=False),
test_cfg=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.01,
score_thr=0.1,
min_bbox_size=0,
nms_pre=100,
max_num=50))
================================================
FILE: plugin/configs/_base_/models/hv_pointpillars_secfpn_waymo.py
================================================
# model settings
# Voxel size for voxel encoder
# Usually voxel size is changed consistently with the point cloud range
# If point cloud range is modified, do remember to change all related
# keys in the config.
voxel_size = [0.32, 0.32, 6]
model = dict(
type='MVXFasterRCNN',
pts_voxel_layer=dict(
max_num_points=20,
point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],
voxel_size=voxel_size,
max_voxels=(32000, 32000)),
pts_voxel_encoder=dict(
type='HardVFE',
in_channels=5,
feat_channels=[64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
pts_middle_encoder=dict(
type='PointPillarsScatter', in_channels=64, output_shape=[468, 468]),
pts_backbone=dict(
type='SECOND',
in_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
layer_nums=[3, 5, 5],
layer_strides=[1, 2, 2],
out_channels=[64, 128, 256]),
pts_neck=dict(
type='SECONDFPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128]),
pts_bbox_head=dict(
type='Anchor3DHead',
num_classes=3,
in_channels=384,
feat_channels=384,
use_direction_classifier=True,
anchor_generator=dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345],
[-74.88, -74.88, -0.1188, 74.88, 74.88, -0.1188],
[-74.88, -74.88, 0, 74.88, 74.88, 0]],
sizes=[
[2.08, 4.73, 1.77], # car
[0.84, 1.81, 1.77], # cyclist
[0.84, 0.91, 1.74] # pedestrian
],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
# model training and testing settings
train_cfg=dict(
pts=dict(
assigner=[
dict( # car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # cyclist
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
dict( # pedestrian
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
],
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
pos_weight=-1,
debug=False)),
test_cfg=dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=4096,
nms_thr=0.25,
score_thr=0.1,
min_bbox_size=0,
max_num=500)))
================================================
FILE: plugin/configs/_base_/models/hv_second_secfpn_kitti.py
================================================
voxel_size = [0.05, 0.05, 0.1]
model = dict(
type='VoxelNet',
voxel_layer=dict(
max_num_points=5,
point_cloud_range=[0, -40, -3, 70.4, 40, 1],
voxel_size=voxel_size,
max_voxels=(16000, 40000)),
voxel_encoder=dict(type='HardSimpleVFE'),
middle_encoder=dict(
type='SparseEncoder',
in_channels=4,
sparse_shape=[41, 1600, 1408],
order=('conv', 'norm', 'act')),
backbone=dict(
type='SECOND',
in_channels=256,
layer_nums=[5, 5],
layer_strides=[1, 2],
out_channels=[128, 256]),
neck=dict(
type='SECONDFPN',
in_channels=[128, 256],
upsample_strides=[1, 2],
out_channels=[256, 256]),
bbox_head=dict(
type='Anchor3DHead',
num_classes=3,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78],
],
sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
# model training and testing settings
train_cfg=dict(
assigner=[
dict( # for Pedestrian
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.35,
neg_iou_thr=0.2,
min_pos_iou=0.2,
ignore_iof_thr=-1),
dict( # for Cyclist
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.35,
neg_iou_thr=0.2,
min_pos_iou=0.2,
ignore_iof_thr=-1),
dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
],
allowed_border=0,
pos_weight=-1,
debug=False),
test_cfg=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.01,
score_thr=0.1,
min_bbox_size=0,
nms_pre=100,
max_num=50))
================================================
FILE: plugin/configs/_base_/models/hv_second_secfpn_waymo.py
================================================
# model settings
# Voxel size for voxel encoder
# Usually voxel size is changed consistently with the point cloud range
# If point cloud range is modified, do remember to change all related
# keys in the config.
voxel_size = [0.08, 0.08, 0.1]
model = dict(
type='VoxelNet',
voxel_layer=dict(
max_num_points=10,
point_cloud_range=[-76.8, -51.2, -2, 76.8, 51.2, 4],
voxel_size=voxel_size,
max_voxels=(80000, 90000)),
voxel_encoder=dict(type='HardSimpleVFE', num_features=5),
middle_encoder=dict(
type='SparseEncoder',
in_channels=5,
sparse_shape=[61, 1280, 1920],
order=('conv', 'norm', 'act')),
backbone=dict(
type='SECOND',
in_channels=384,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
layer_nums=[5, 5],
layer_strides=[1, 2],
out_channels=[128, 256]),
neck=dict(
type='SECONDFPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[128, 256],
upsample_strides=[1, 2],
out_channels=[256, 256]),
bbox_head=dict(
type='Anchor3DHead',
num_classes=3,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
anchor_generator=dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-76.8, -51.2, -0.0345, 76.8, 51.2, -0.0345],
[-76.8, -51.2, 0, 76.8, 51.2, 0],
[-76.8, -51.2, -0.1188, 76.8, 51.2, -0.1188]],
sizes=[
[2.08, 4.73, 1.77], # car
[0.84, 0.91, 1.74], # pedestrian
[0.84, 1.81, 1.77] # cyclist
],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
# model training and testing settings
train_cfg=dict(
assigner=[
dict( # car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # pedestrian
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
dict( # cyclist
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1)
],
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
pos_weight=-1,
debug=False),
test_cfg=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=4096,
nms_thr=0.25,
score_thr=0.1,
min_bbox_size=0,
max_num=500))
================================================
FILE: plugin/configs/_base_/models/imvotenet_image.py
================================================
model = dict(
type='ImVoteNet',
img_backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
style='caffe'),
img_neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
img_rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
img_roi_head=dict(
type='StandardRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=10,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
# model training and testing settings
train_cfg=dict(
img_rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False),
img_rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
img_rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False)),
test_cfg=dict(
img_rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_per_img=1000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
img_rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100)))
================================================
FILE: plugin/configs/_base_/models/mask_rcnn_r50_fpn.py
================================================
# model settings
model = dict(
type='MaskRCNN',
pretrained='torchvision://resnet50',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
roi_head=dict(
type='StandardRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='FCNMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=80,
loss_mask=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
# model training and testing settings
train_cfg=dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False)),
test_cfg=dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100,
mask_thr_binary=0.5)))
================================================
FILE: plugin/configs/_base_/models/paconv_cuda_ssg.py
================================================
_base_ = './paconv_ssg.py'
model = dict(
backbone=dict(
sa_cfg=dict(
type='PAConvCUDASAModule',
scorenet_cfg=dict(mlp_channels=[8, 16, 16]))))
================================================
FILE: plugin/configs/_base_/models/paconv_ssg.py
================================================
# model settings
model = dict(
type='EncoderDecoder3D',
backbone=dict(
type='PointNet2SASSG',
in_channels=9, # [xyz, rgb, normalized_xyz]
num_points=(1024, 256, 64, 16),
radius=(None, None, None, None), # use kNN instead of ball query
num_samples=(32, 32, 32, 32),
sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,
512)),
fp_channels=(),
norm_cfg=dict(type='BN2d', momentum=0.1),
sa_cfg=dict(
type='PAConvSAModule',
pool_mod='max',
use_xyz=True,
normalize_xyz=False,
paconv_num_kernels=[16, 16, 16],
paconv_kernel_input='w_neighbor',
scorenet_input='w_neighbor_dist',
scorenet_cfg=dict(
mlp_channels=[16, 16, 16],
score_norm='softmax',
temp_factor=1.0,
last_bn=False))),
decode_head=dict(
type='PAConvHead',
# PAConv model's decoder takes skip connections from beckbone
# different from PointNet++, it also concats input features in the last
# level of decoder, leading to `128 + 6` as the channel number
fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
(128 + 6, 128, 128, 128)),
channels=128,
dropout_ratio=0.5,
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
act_cfg=dict(type='ReLU'),
loss_decode=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
class_weight=None, # should be modified with dataset
loss_weight=1.0)),
# correlation loss to regularize PAConv's kernel weights
loss_regularization=dict(
type='PAConvRegularizationLoss', reduction='sum', loss_weight=10.0),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='slide'))
================================================
FILE: plugin/configs/_base_/models/parta2.py
================================================
# model settings
voxel_size = [0.05, 0.05, 0.1]
point_cloud_range = [0, -40, -3, 70.4, 40, 1]
model = dict(
type='PartA2',
voxel_layer=dict(
max_num_points=5, # max_points_per_voxel
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(16000, 40000) # (training, testing) max_voxels
),
voxel_encoder=dict(type='HardSimpleVFE'),
middle_encoder=dict(
type='SparseUNet',
in_channels=4,
sparse_shape=[41, 1600, 1408],
order=('conv', 'norm', 'act')),
backbone=dict(
type='SECOND',
in_channels=256,
layer_nums=[5, 5],
layer_strides=[1, 2],
out_channels=[128, 256]),
neck=dict(
type='SECONDFPN',
in_channels=[128, 256],
upsample_strides=[1, 2],
out_channels=[256, 256]),
rpn_head=dict(
type='PartA2RPNHead',
num_classes=3,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True,
assigner_per_size=True,
assign_per_class=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
roi_head=dict(
type='PartAggregationROIHead',
num_classes=3,
semantic_head=dict(
type='PointwiseSemanticHead',
in_channels=16,
extra_width=0.2,
seg_score_thr=0.3,
num_classes=3,
loss_seg=dict(
type='FocalLoss',
use_sigmoid=True,
reduction='sum',
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_part=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
seg_roi_extractor=dict(
type='Single3DRoIAwareExtractor',
roi_layer=dict(
type='RoIAwarePool3d',
out_size=14,
max_pts_per_voxel=128,
mode='max')),
part_roi_extractor=dict(
type='Single3DRoIAwareExtractor',
roi_layer=dict(
type='RoIAwarePool3d',
out_size=14,
max_pts_per_voxel=128,
mode='avg')),
bbox_head=dict(
type='PartA2BboxHead',
num_classes=3,
seg_in_channels=16,
part_in_channels=4,
seg_conv_channels=[64, 64],
part_conv_channels=[64, 64],
merge_conv_channels=[128, 128],
down_conv_channels=[128, 256],
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
shared_fc_channels=[256, 512, 512, 512],
cls_channels=[256, 256],
reg_channels=[256, 256],
dropout_ratio=0.1,
roi_feat_size=14,
with_corner_loss=True,
loss_bbox=dict(
type='SmoothL1Loss',
beta=1.0 / 9.0,
reduction='sum',
loss_weight=1.0),
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
reduction='sum',
loss_weight=1.0))),
# model training and testing settings
train_cfg=dict(
rpn=dict(
assigner=[
dict( # for Pedestrian
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # for Cyclist
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1)
],
allowed_border=0,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_pre=9000,
nms_post=512,
max_num=512,
nms_thr=0.8,
score_thr=0,
use_rotate_nms=False),
rcnn=dict(
assigner=[
dict( # for Pedestrian
type='MaxIoUAssigner',
iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
ignore_iof_thr=-1),
dict( # for Cyclist
type='MaxIoUAssigner',
iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
ignore_iof_thr=-1),
dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
ignore_iof_thr=-1)
],
sampler=dict(
type='IoUNegPiecewiseSampler',
num=128,
pos_fraction=0.55,
neg_piece_fractions=[0.8, 0.2],
neg_iou_piece_thrs=[0.55, 0.1],
neg_pos_ub=-1,
add_gt_as_proposals=False,
return_iou=True),
cls_pos_thr=0.75,
cls_neg_thr=0.25)),
test_cfg=dict(
rpn=dict(
nms_pre=1024,
nms_post=100,
max_num=100,
nms_thr=0.7,
score_thr=0,
use_rotate_nms=True),
rcnn=dict(
use_rotate_nms=True,
use_raw_score=True,
nms_thr=0.01,
score_thr=0.1)))
================================================
FILE: plugin/configs/_base_/models/pointnet2_msg.py
================================================
_base_ = './pointnet2_ssg.py'
# model settings
model = dict(
backbone=dict(
_delete_=True,
type='PointNet2SAMSG',
in_channels=6, # [xyz, rgb], should be modified with dataset
num_points=(1024, 256, 64, 16),
radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)),
num_samples=((16, 32), (16, 32), (16, 32), (16, 32)),
sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96,
128)),
((128, 196, 256), (128, 196, 256)), ((256, 256, 512),
(256, 384, 512))),
aggregation_channels=(None, None, None, None),
fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')),
fps_sample_range_lists=((-1), (-1), (-1), (-1)),
dilated_group=(False, False, False, False),
out_indices=(0, 1, 2, 3),
sa_cfg=dict(
type='PointSAModuleMSG',
pool_mod='max',
use_xyz=True,
normalize_xyz=False)),
decode_head=dict(
fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128),
(128, 128, 128, 128))))
================================================
FILE: plugin/configs/_base_/models/pointnet2_ssg.py
================================================
# model settings
model = dict(
type='EncoderDecoder3D',
backbone=dict(
type='PointNet2SASSG',
in_channels=6, # [xyz, rgb], should be modified with dataset
num_points=(1024, 256, 64, 16),
radius=(0.1, 0.2, 0.4, 0.8),
num_samples=(32, 32, 32, 32),
sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,
512)),
fp_channels=(),
norm_cfg=dict(type='BN2d'),
sa_cfg=dict(
type='PointSAModule',
pool_mod='max',
use_xyz=True,
normalize_xyz=False)),
decode_head=dict(
type='PointNet2Head',
fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
(128, 128, 128, 128)),
channels=128,
dropout_ratio=0.5,
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
act_cfg=dict(type='ReLU'),
loss_decode=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
class_weight=None, # should be modified with dataset
loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='slide'))
================================================
FILE: plugin/configs/_base_/models/votenet.py
================================================
model = dict(
type='VoteNet',
backbone=dict(
type='PointNet2SASSG',
in_channels=4,
num_points=(2048, 1024, 512, 256),
radius=(0.2, 0.4, 0.8, 1.2),
num_samples=(64, 32, 16, 16),
sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
(128, 128, 256)),
fp_channels=((256, 256), (256, 256)),
norm_cfg=dict(type='BN2d'),
sa_cfg=dict(
type='PointSAModule',
pool_mod='max',
use_xyz=True,
normalize_xyz=True)),
bbox_head=dict(
type='VoteHead',
vote_module_cfg=dict(
in_channels=256,
vote_per_seed=1,
gt_per_seed=3,
conv_channels=(256, 256),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
norm_feats=True,
vote_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='none',
loss_dst_weight=10.0)),
vote_aggregation_cfg=dict(
type='PointSAModule',
num_point=256,
radius=0.3,
num_sample=16,
mlp_channels=[256, 128, 128, 128],
use_xyz=True,
normalize_xyz=True),
pred_layer_cfg=dict(
in_channels=128, shared_conv_channels=(128, 128), bias=True),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.2, 0.8],
reduction='sum',
loss_weight=5.0),
center_loss=dict(
type='ChamferDistance',
mode='l2',
reduction='sum',
loss_src_weight=10.0,
loss_dst_weight=10.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0),
semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(
pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'),
test_cfg=dict(
sample_mod='seed',
nms_thr=0.25,
score_thr=0.05,
per_class_proposal=True))
================================================
FILE: plugin/configs/_base_/schedules/cosine.py
================================================
# This schedule is mainly used by models with dynamic voxelization
# optimizer
lr = 0.003 # max learning rate
optimizer = dict(
type='AdamW',
lr=lr,
betas=(0.95, 0.99), # the momentum is change during training
weight_decay=0.001)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(
policy='CosineAnnealing',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 10,
min_lr_ratio=1e-5)
momentum_config = None
runner = dict(type='EpochBasedRunner', max_epochs=40)
================================================
FILE: plugin/configs/_base_/schedules/cyclic_20e.py
================================================
# For nuScenes dataset, we usually evaluate the model at the end of training.
# Since the models are trained by 24 epochs by default, we set evaluation
# interval to be 20. Please change the interval accordingly if you do not
# use a default schedule.
# optimizer
# This schedule is mainly used by models on nuScenes dataset
optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01)
# max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='cyclic',
target_ratio=(10, 1e-4),
cyclic_times=1,
step_ratio_up=0.4,
)
momentum_config = dict(
policy='cyclic',
target_ratio=(0.85 / 0.95, 1),
cyclic_times=1,
step_ratio_up=0.4,
)
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=20)
================================================
FILE: plugin/configs/_base_/schedules/cyclic_40e.py
================================================
# The schedule is usually used by models trained on KITTI dataset
# The learning rate set in the cyclic schedule is the initial learning rate
# rather than the max learning rate. Since the target_ratio is (10, 1e-4),
# the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4
lr = 0.0018
# The optimizer follows the setting in SECOND.Pytorch, but here we use
# the offcial AdamW optimizer implemented by PyTorch.
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
# We use cyclic learning rate and momentum schedule following SECOND.Pytorch
# https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69 # noqa
# We implement them in mmcv, for more details, please refer to
# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327 # noqa
# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130 # noqa
lr_config = dict(
policy='cyclic',
target_ratio=(10, 1e-4),
cyclic_times=1,
step_ratio_up=0.4,
)
momentum_config = dict(
policy='cyclic',
target_ratio=(0.85 / 0.95, 1),
cyclic_times=1,
step_ratio_up=0.4,
)
# Although the max_epochs is 40, this schedule is usually used we
# RepeatDataset with repeat ratio N, thus the actual max epoch
# number could be Nx40
runner = dict(type='EpochBasedRunner', max_epochs=40)
================================================
FILE: plugin/configs/_base_/schedules/mmdet_schedule_1x.py
================================================
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[8, 11])
runner = dict(type='EpochBasedRunner', max_epochs=12)
================================================
FILE: plugin/configs/_base_/schedules/schedule_2x.py
================================================
# optimizer
# This schedule is mainly used by models on nuScenes dataset
optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
# max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[20, 23])
momentum_config = None
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=24)
================================================
FILE: plugin/configs/_base_/schedules/schedule_3x.py
================================================
# optimizer
# This schedule is mainly used by models on indoor dataset,
# e.g., VoteNet on SUNRGBD and ScanNet
lr = 0.008 # max learning rate
optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(policy='step', warmup=None, step=[24, 32])
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=36)
================================================
FILE: plugin/configs/_base_/schedules/seg_cosine_150e.py
================================================
# optimizer
# This schedule is mainly used on S3DIS dataset in segmentation task
optimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9)
optimizer_config = dict(grad_clip=None)
lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002)
momentum_config = None
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=150)
================================================
FILE: plugin/configs/_base_/schedules/seg_cosine_200e.py
================================================
# optimizer
# This schedule is mainly used on ScanNet dataset in segmentation task
optimizer = dict(type='Adam', lr=0.001, weight_decay=0.01)
optimizer_config = dict(grad_clip=None)
lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
momentum_config = None
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=200)
================================================
FILE: plugin/configs/_base_/schedules/seg_cosine_50e.py
================================================
# optimizer
# This schedule is mainly used on S3DIS dataset in segmentation task
optimizer = dict(type='Adam', lr=0.001, weight_decay=0.001)
optimizer_config = dict(grad_clip=None)
lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
momentum_config = None
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=50)
================================================
FILE: plugin/configs/maptracker/av2_newsplit/maptracker_av2_100x50_newsplit_5frame_span10_stage1_bev_pretrain.py
================================================
_base_ = [
'../../_base_/default_runtime.py'
]
# model type
type = 'Mapper'
plugin = True
# plugin code dir
plugin_dir = 'plugin/'
[]
# img configs
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
img_h = 608
img_w = 608
img_size = (img_h, img_w)
num_cams = 7
num_gpus = 8
batch_size = 1
num_iters_per_epoch = 29293 // (num_gpus * batch_size)
num_epochs = 12
num_epochs_interval = num_epochs // 6
total_iters = num_epochs * num_iters_per_epoch
num_queries = 100
# category configs
cat2id = {
'ped_crossing': 0,
'divider': 1,
'boundary': 2,
}
num_class = max(list(cat2id.values())) + 1
# bev configs
roi_size = (100, 50) # bev range, 100m in x-axis, 50m in y-axis
bev_h = 50
bev_w = 100
pc_range = [-roi_size[0]/2, -roi_size[1]/2, -3, roi_size[0]/2, roi_size[1]/2, 5]
# vectorize params
coords_dim = 2
sample_dist = -1
sample_num = -1
simplify = True
# rasterize params (for temporal matching use)
canvas_size = (200, 100) # bev feature size
thickness = 3 # thickness of rasterized polylines
# meta info for submission pkl
meta = dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False,
output_format='vector')
# model configs
bev_embed_dims = 256
embed_dims = 512
num_feat_levels = 3
norm_cfg = dict(type='BN2d')
num_class = max(list(cat2id.values()))+1
num_points = 20
permute = True
model = dict(
type='MapTracker',
roi_size=roi_size,
bev_h=bev_h,
bev_w=bev_w,
history_steps=4,
test_time_history_steps=20,
mem_select_dist_ranges=[1, 5, 10, 15],
skip_vector_head=True,
freeze_bev=False,
track_fp_aug=False,
use_memory=False,
mem_len=4,
mem_warmup_iters=500,
backbone_cfg=dict(
type='BEVFormerBackbone',
roi_size=roi_size,
bev_h=bev_h,
bev_w=bev_w,
use_grid_mask=True,
history_steps=4,
img_backbone=dict(
type='ResNet',
with_cp=False,
# pretrained='./resnet50_checkpoint.pth',
pretrained='open-mmlab://detectron2/resnet50_caffe',
depth=50,
num_stages=4,
out_indices=(1, 2, 3),
frozen_stages=-1,
norm_cfg=norm_cfg,
norm_eval=True,
style='caffe',
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
stage_with_dcn=(False, False, True, True)
),
img_neck=dict(
type='FPN',
in_channels=[512, 1024, 2048],
out_channels=bev_embed_dims,
start_level=0,
add_extra_convs=True,
num_outs=num_feat_levels,
norm_cfg=norm_cfg,
relu_before_extra_convs=True),
transformer=dict(
type='PerceptionTransformer',
embed_dims=bev_embed_dims,
num_cams=num_cams,
encoder=dict(
type='BEVFormerEncoder',
num_layers=2,
pc_range=pc_range,
num_points_in_pillar=4,
return_intermediate=False,
transformerlayers=dict(
type='BEVFormerLayer',
attn_cfgs=[
dict(
type='TemporalSelfAttention',
embed_dims=bev_embed_dims,
num_levels=1),
dict(
type='SpatialCrossAttention',
deformable_attention=dict(
type='MSDeformableAttention3D',
embed_dims=bev_embed_dims,
num_points=8,
num_levels=num_feat_levels),
embed_dims=bev_embed_dims,
num_cams=num_cams,
),
],
feedforward_channels=bev_embed_dims*2,
ffn_dropout=0.1,
operation_order=('self_attn', 'norm', 'cross_attn', 'norm',
'ffn', 'norm')
)
),
),
positional_encoding=dict(
type='LearnedPositionalEncoding',
num_feats=bev_embed_dims//2,
row_num_embed=bev_h,
col_num_embed=bev_w,
),
),
head_cfg=dict(
type='MapDetectorHead',
num_queries=num_queries,
embed_dims=embed_dims,
num_classes=num_class,
in_channels=bev_embed_dims,
num_points=num_points,
roi_size=roi_size,
coord_dim=2,
different_heads=False,
predict_refine=False,
sync_cls_avg_factor=True,
trans_loss_weight=0.1,
transformer=dict(
type='MapTransformer',
num_feature_levels=1,
num_points=num_points,
coord_dim=
gitextract_kr8oe7q1/
├── .gitignore
├── LICENSE
├── LICENSE_GPL
├── README.md
├── docs/
│ ├── data_preparation.md
│ ├── getting_started.md
│ └── installation.md
├── plugin/
│ ├── __init__.py
│ ├── configs/
│ │ ├── _base_/
│ │ │ ├── datasets/
│ │ │ │ ├── coco_instance.py
│ │ │ │ ├── kitti-3d-3class.py
│ │ │ │ ├── kitti-3d-car.py
│ │ │ │ ├── lyft-3d.py
│ │ │ │ ├── nuim_instance.py
│ │ │ │ ├── nus-3d.py
│ │ │ │ ├── nus-mono3d.py
│ │ │ │ ├── range100_lyft-3d.py
│ │ │ │ ├── s3dis_seg-3d-13class.py
│ │ │ │ ├── scannet-3d-18class.py
│ │ │ │ ├── scannet_seg-3d-20class.py
│ │ │ │ ├── sunrgbd-3d-10class.py
│ │ │ │ ├── waymoD5-3d-3class.py
│ │ │ │ └── waymoD5-3d-car.py
│ │ │ ├── default_runtime.py
│ │ │ ├── models/
│ │ │ │ ├── 3dssd.py
│ │ │ │ ├── cascade_mask_rcnn_r50_fpn.py
│ │ │ │ ├── centerpoint_01voxel_second_secfpn_nus.py
│ │ │ │ ├── centerpoint_02pillar_second_secfpn_nus.py
│ │ │ │ ├── fcos3d.py
│ │ │ │ ├── groupfree3d.py
│ │ │ │ ├── h3dnet.py
│ │ │ │ ├── hv_pointpillars_fpn_lyft.py
│ │ │ │ ├── hv_pointpillars_fpn_nus.py
│ │ │ │ ├── hv_pointpillars_fpn_range100_lyft.py
│ │ │ │ ├── hv_pointpillars_secfpn_kitti.py
│ │ │ │ ├── hv_pointpillars_secfpn_waymo.py
│ │ │ │ ├── hv_second_secfpn_kitti.py
│ │ │ │ ├── hv_second_secfpn_waymo.py
│ │ │ │ ├── imvotenet_image.py
│ │ │ │ ├── mask_rcnn_r50_fpn.py
│ │ │ │ ├── paconv_cuda_ssg.py
│ │ │ │ ├── paconv_ssg.py
│ │ │ │ ├── parta2.py
│ │ │ │ ├── pointnet2_msg.py
│ │ │ │ ├── pointnet2_ssg.py
│ │ │ │ └── votenet.py
│ │ │ └── schedules/
│ │ │ ├── cosine.py
│ │ │ ├── cyclic_20e.py
│ │ │ ├── cyclic_40e.py
│ │ │ ├── mmdet_schedule_1x.py
│ │ │ ├── schedule_2x.py
│ │ │ ├── schedule_3x.py
│ │ │ ├── seg_cosine_150e.py
│ │ │ ├── seg_cosine_200e.py
│ │ │ └── seg_cosine_50e.py
│ │ └── maptracker/
│ │ ├── av2_newsplit/
│ │ │ ├── maptracker_av2_100x50_newsplit_5frame_span10_stage1_bev_pretrain.py
│ │ │ ├── maptracker_av2_100x50_newsplit_5frame_span10_stage2_warmup.py
│ │ │ ├── maptracker_av2_100x50_newsplit_5frame_span10_stage3_joint_finetune.py
│ │ │ ├── maptracker_av2_newsplit_5frame_span10_stage1_bev_pretrain.py
│ │ │ ├── maptracker_av2_newsplit_5frame_span10_stage2_warmup.py
│ │ │ └── maptracker_av2_newsplit_5frame_span10_stage3_joint_finetune.py
│ │ ├── av2_oldsplit/
│ │ │ ├── maptracker_av2_oldsplit_5frame_span10_stage1_bev_pretrain.py
│ │ │ ├── maptracker_av2_oldsplit_5frame_span10_stage2_warmup.py
│ │ │ └── maptracker_av2_oldsplit_5frame_span10_stage3_joint_finetune.py
│ │ ├── nuscenes_newsplit/
│ │ │ ├── maptracker_nusc_newsplit_5frame_span10_stage1_bev_pretrain.py
│ │ │ ├── maptracker_nusc_newsplit_5frame_span10_stage2_warmup.py
│ │ │ └── maptracker_nusc_newsplit_5frame_span10_stage3_joint_finetune.py
│ │ └── nuscenes_oldsplit/
│ │ ├── maptracker_nusc_oldsplit_5frame_span10_stage1_bev_pretrain.py
│ │ ├── maptracker_nusc_oldsplit_5frame_span10_stage2_warmup.py
│ │ └── maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune.py
│ ├── core/
│ │ ├── apis/
│ │ │ ├── __init__.py
│ │ │ ├── mmdet_train.py
│ │ │ ├── test.py
│ │ │ └── train.py
│ │ └── evaluation/
│ │ ├── __init__.py
│ │ └── eval_hooks.py
│ ├── datasets/
│ │ ├── __init__.py
│ │ ├── argo_dataset.py
│ │ ├── base_dataset.py
│ │ ├── builder.py
│ │ ├── evaluation/
│ │ │ ├── AP.py
│ │ │ ├── __init__.py
│ │ │ ├── distance.py
│ │ │ ├── raster_eval.py
│ │ │ └── vector_eval.py
│ │ ├── map_utils/
│ │ │ ├── av2map_extractor.py
│ │ │ ├── nuscmap_extractor.py
│ │ │ └── utils.py
│ │ ├── nusc_dataset.py
│ │ ├── pipelines/
│ │ │ ├── __init__.py
│ │ │ ├── formating.py
│ │ │ ├── loading.py
│ │ │ ├── rasterize.py
│ │ │ ├── transform.py
│ │ │ └── vectorize.py
│ │ ├── samplers/
│ │ │ ├── __init__.py
│ │ │ ├── distributed_sampler.py
│ │ │ ├── group_sampler.py
│ │ │ └── sampler.py
│ │ └── visualize/
│ │ └── renderer.py
│ └── models/
│ ├── __init__.py
│ ├── assigner/
│ │ ├── __init__.py
│ │ ├── assigner.py
│ │ └── match_cost.py
│ ├── backbones/
│ │ ├── __init__.py
│ │ ├── bevformer/
│ │ │ ├── __init__.py
│ │ │ ├── custom_base_transformer_layer.py
│ │ │ ├── encoder.py
│ │ │ ├── grid_mask.py
│ │ │ ├── multi_scale_deformable_attn_function.py
│ │ │ ├── spatial_cross_attention.py
│ │ │ ├── temporal_net.py
│ │ │ ├── temporal_self_attention.py
│ │ │ └── transformer.py
│ │ └── bevformer_backbone.py
│ ├── heads/
│ │ ├── MapDetectorHead.py
│ │ ├── MapSegHead.py
│ │ ├── __init__.py
│ │ └── base_map_head.py
│ ├── losses/
│ │ ├── __init__.py
│ │ ├── detr_loss.py
│ │ └── seg_loss.py
│ ├── mapers/
│ │ ├── MapTracker.py
│ │ ├── __init__.py
│ │ ├── base_mapper.py
│ │ └── vector_memory.py
│ ├── necks/
│ │ ├── __init__.py
│ │ └── gru.py
│ ├── transformer_utils/
│ │ ├── CustomMSDeformableAttention.py
│ │ ├── MapTransformer.py
│ │ ├── __init__.py
│ │ ├── base_transformer.py
│ │ ├── deformable_transformer.py
│ │ └── fp16_dattn.py
│ └── utils/
│ ├── __init__.py
│ ├── query_update.py
│ └── renderer_track.py
├── requirements.txt
└── tools/
├── benchmark.py
├── data_converter/
│ ├── __init__.py
│ ├── argoverse_converter.py
│ ├── av2_train_split.txt
│ ├── av2_val_split.txt
│ ├── nusc_split.py
│ └── nuscenes_converter.py
├── dist_test.sh
├── dist_train.sh
├── mmdet_test.py
├── mmdet_train.py
├── slurm_test.sh
├── slurm_train.sh
├── test.py
├── tracking/
│ ├── calculate_cmap.py
│ ├── cmap_utils/
│ │ ├── __init__.py
│ │ ├── data_utils.py
│ │ ├── match_utils.py
│ │ └── utils.py
│ ├── prepare_gt_tracks.py
│ └── prepare_pred_tracks.py
├── train.py
└── visualization/
├── vis_global.py
└── vis_per_frame.py
SYMBOL INDEX (552 symbols across 66 files)
FILE: plugin/core/apis/mmdet_train.py
class MyRunnerWrapper (line 33) | class MyRunnerWrapper(IterBasedRunner):
method train (line 34) | def train(self, data_loader, **kwargs):
function custom_train_detector (line 54) | def custom_train_detector(model,
FILE: plugin/core/apis/test.py
function custom_encode_mask_results (line 25) | def custom_encode_mask_results(mask_results):
function custom_multi_gpu_test (line 45) | def custom_multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=F...
function collect_results_cpu (line 112) | def collect_results_cpu(result_part, size, tmpdir=None):
function collect_results_gpu (line 160) | def collect_results_gpu(result_part, size):
FILE: plugin/core/apis/train.py
function custom_train_model (line 14) | def custom_train_model(model,
function train_model (line 41) | def train_model(model,
FILE: plugin/core/evaluation/eval_hooks.py
function _calc_dynamic_intervals (line 17) | def _calc_dynamic_intervals(start_interval, dynamic_interval_list):
class CustomDistEvalHook (line 29) | class CustomDistEvalHook(BaseDistEvalHook):
method __init__ (line 31) | def __init__(self, *args, dynamic_intervals=None, **kwargs):
method _decide_interval (line 38) | def _decide_interval(self, runner):
method before_train_epoch (line 45) | def before_train_epoch(self, runner):
method before_train_iter (line 50) | def before_train_iter(self, runner):
method _do_evaluate (line 54) | def _do_evaluate(self, runner):
FILE: plugin/datasets/argo_dataset.py
class AV2Dataset (line 15) | class AV2Dataset(BaseMapDataset):
method __init__ (line 30) | def __init__(self, **kwargs,):
method load_annotations (line 36) | def load_annotations(self, ann_file):
method load_matching (line 96) | def load_matching(self, matching_file):
method get_sample (line 107) | def get_sample(self, idx):
FILE: plugin/datasets/base_dataset.py
class BaseMapDataset (line 18) | class BaseMapDataset(Dataset):
method __init__ (line 32) | def __init__(self,
method _set_sequence_info (line 96) | def _set_sequence_info(self):
method _set_sequence_group_flag (line 117) | def _set_sequence_group_flag(self):
method load_annotations (line 161) | def load_annotations(self, ann_file):
method load_matching (line 164) | def load_matching(self, matching_file):
method get_sample (line 167) | def get_sample(self, idx):
method format_results (line 170) | def format_results(self, results, denormalize=True, prefix=None, save_...
method evaluate (line 298) | def evaluate(self, results, logger=None, **kwargs):
method _evaluate (line 319) | def _evaluate(self, result_path, logger=None, eval_semantic=False):
method show_gt (line 327) | def show_gt(self, idx, out_dir='demo/'):
method show_result (line 361) | def show_result(self, submission, idx, score_thr=0, draw_score=False, ...
method show_track (line 406) | def show_track(self, submission, idx, out_dir='demo/'):
method __len__ (line 429) | def __len__(self):
method _rand_another (line 437) | def _rand_another(self, idx):
method __getitem__ (line 445) | def __getitem__(self, idx):
FILE: plugin/datasets/builder.py
function build_dataloader (line 23) | def build_dataloader(dataset,
function worker_init_fn (line 121) | def worker_init_fn(worker_id, num_workers, rank, seed):
FILE: plugin/datasets/evaluation/AP.py
function average_precision (line 7) | def average_precision(recalls, precisions, mode='area'):
function instance_match (line 52) | def instance_match(pred_lines: NDArray,
FILE: plugin/datasets/evaluation/distance.py
function chamfer_distance (line 5) | def chamfer_distance(line1: NDArray, line2: NDArray) -> float:
function frechet_distance (line 23) | def frechet_distance(line1: NDArray, line2: NDArray) -> float:
function chamfer_distance_batch (line 37) | def chamfer_distance_batch(pred_lines, gt_lines):
FILE: plugin/datasets/evaluation/raster_eval.py
class RasterEvaluate (line 14) | class RasterEvaluate(object):
method __init__ (line 22) | def __init__(self, dataset_cfg: Config, n_workers: int=N_WORKERS):
method gts (line 31) | def gts(self) -> Dict[str, NDArray]:
method evaluate (line 42) | def evaluate(self,
FILE: plugin/datasets/evaluation/vector_eval.py
class VectorEvaluate (line 24) | class VectorEvaluate(object):
method __init__ (line 32) | def __init__(self, dataset_cfg: Config, n_workers: int=N_WORKERS) -> N...
method gts (line 45) | def gts(self) -> Dict[str, Dict[int, List[NDArray]]]:
method interp_fixed_num (line 77) | def interp_fixed_num(self,
method interp_fixed_dist (line 96) | def interp_fixed_dist(self,
method _evaluate_single (line 118) | def _evaluate_single(self,
method evaluate (line 169) | def evaluate(self,
FILE: plugin/datasets/map_utils/av2map_extractor.py
class AV2MapExtractor (line 24) | class AV2MapExtractor(object):
method __init__ (line 31) | def __init__(self, roi_size: Union[Tuple, List], id2map: Dict) -> None:
method generate_nearby_dividers (line 38) | def generate_nearby_dividers(self,avm, e2g_translation, e2g_rotation,p...
method proc_polygon (line 175) | def proc_polygon(self,polygon, ego_SE3_city):
method proc_line (line 187) | def proc_line(self,line,ego_SE3_city):
method extract_local_divider (line 193) | def extract_local_divider(self,nearby_dividers, ego_SE3_city, patch_bo...
method extract_local_boundary (line 241) | def extract_local_boundary(self,avm, ego_SE3_city, patch_box, patch_an...
method get_scene_dividers (line 326) | def get_scene_dividers(self,avm,patch_box,patch_angle):
method get_scene_ped_crossings (line 354) | def get_scene_ped_crossings(self,avm,e2g_translation,e2g_rotation,poly...
method get_map_geom (line 391) | def get_map_geom(self,
FILE: plugin/datasets/map_utils/nuscmap_extractor.py
class NuscMapExtractor (line 17) | class NuscMapExtractor(object):
method __init__ (line 24) | def __init__(self, data_root: str, roi_size: Union[List, Tuple]) -> None:
method get_map_geom (line 36) | def get_map_geom(self,
class VectorizedLocalMap (line 57) | class VectorizedLocalMap(object):
method __init__ (line 65) | def __init__(self,
method gen_vectorized_samples (line 91) | def gen_vectorized_samples(self, lidar2global_translation, lidar2globa...
method get_centerline_geom (line 140) | def get_centerline_geom(self, patch_box, patch_angle, layer_names):
method get_map_geom (line 153) | def get_map_geom(self, patch_box, patch_angle, layer_names):
method get_divider_line (line 170) | def get_divider_line(self,patch_box,patch_angle,layer_name):
method get_contour_line (line 198) | def get_contour_line(self,patch_box,patch_angle,layer_name):
method get_ped_crossing_line (line 243) | def get_ped_crossing_line(self, patch_box, patch_angle):
method _union_ped_stmmapnet (line 266) | def _union_ped_stmmapnet(self, ped_geoms: List[Polygon]) -> List[Polyg...
method _handle_small_peds (line 329) | def _handle_small_peds(self, ped_geoms):
method get_ped_crossing_line_stmmapnet (line 386) | def get_ped_crossing_line_stmmapnet(self, patch_box, patch_angle):
method line_geoms_to_instances (line 428) | def line_geoms_to_instances(self, line_geom):
method _one_type_line_geom_to_instances (line 436) | def _one_type_line_geom_to_instances(self, line_geom):
method ped_poly_geoms_to_instances (line 450) | def ped_poly_geoms_to_instances(self, ped_geom):
method poly_geoms_to_instances (line 487) | def poly_geoms_to_instances(self, polygon_geom):
method centerline_geoms_to_instances (line 525) | def centerline_geoms_to_instances(self,geoms_dict):
method centerline_geoms2vec (line 532) | def centerline_geoms2vec(self, centerline_geoms_list):
method union_centerline (line 542) | def union_centerline(self, centerline_geoms):
class CNuScenesMapExplorer (line 608) | class CNuScenesMapExplorer(NuScenesMapExplorer):
method __ini__ (line 609) | def __ini__(self, *args, **kwargs):
method _get_centerline (line 612) | def _get_centerline(self,
function to_patch_coord (line 679) | def to_patch_coord(new_polygon, patch_angle, patch_x, patch_y):
FILE: plugin/datasets/map_utils/utils.py
function split_collections (line 9) | def split_collections(geom: BaseGeometry) -> List[Optional[BaseGeometry]]:
function get_drivable_area_contour (line 32) | def get_drivable_area_contour(drivable_areas: List[Polygon],
function get_ped_crossing_contour (line 89) | def get_ped_crossing_contour(polygon: Polygon,
function remove_repeated_lines (line 131) | def remove_repeated_lines(lines: List[LineString]) -> List[LineString]:
function remove_repeated_lanesegment (line 167) | def remove_repeated_lanesegment(lane_dict):
function reassign_graph_attribute (line 203) | def reassign_graph_attribute(lane_dict):
function remove_boundary_dividers (line 220) | def remove_boundary_dividers(dividers: List[LineString],
function connect_lines (line 248) | def connect_lines(lines: List[LineString]) -> List[LineString]:
function transform_from (line 299) | def transform_from(xyz: NDArray,
FILE: plugin/datasets/nusc_dataset.py
class NuscDataset (line 13) | class NuscDataset(BaseMapDataset):
method __init__ (line 28) | def __init__(self, data_root, **kwargs):
method load_annotations (line 33) | def load_annotations(self, ann_file):
method load_matching (line 50) | def load_matching(self, matching_file):
method get_sample (line 60) | def get_sample(self, idx):
FILE: plugin/datasets/pipelines/formating.py
class FormatBundleMap (line 9) | class FormatBundleMap(object):
method __init__ (line 20) | def __init__(self, process_img=True,
method __call__ (line 28) | def __call__(self, results):
method __repr__ (line 77) | def __repr__(self):
FILE: plugin/datasets/pipelines/loading.py
class LoadMultiViewImagesFromFiles (line 6) | class LoadMultiViewImagesFromFiles(object):
method __init__ (line 17) | def __init__(self, to_float32=False, color_type='unchanged'):
method __call__ (line 21) | def __call__(self, results):
method __repr__ (line 57) | def __repr__(self):
FILE: plugin/datasets/pipelines/rasterize.py
class RasterizeMap (line 14) | class RasterizeMap(object):
method __init__ (line 25) | def __init__(self,
method line_ego_to_mask (line 41) | def line_ego_to_mask(self,
method polygons_ego_to_mask (line 71) | def polygons_ego_to_mask(self,
method get_semantic_mask (line 102) | def get_semantic_mask(self, map_geoms: Dict) -> NDArray:
method __call__ (line 152) | def __call__(self, input_dict: Dict) -> Dict:
method __repr__ (line 159) | def __repr__(self):
class PV_Map (line 170) | class PV_Map(object):
method __init__ (line 181) | def __init__(self,
method perspective (line 203) | def perspective(self,cam_coords, proj_mat):
method get_valid_pix_coords (line 212) | def get_valid_pix_coords(pix_coords):
method line_ego_to_pvmask (line 218) | def line_ego_to_pvmask(self,
method lines_ego_to_pv (line 236) | def lines_ego_to_pv(self, lines_ego, pv_mask, ego2imgs, color=1, thick...
method get_pvmask_old (line 256) | def get_pvmask_old(self,map_geoms: Dict,ego2img: List, img_filenames: ...
method get_pvmask (line 287) | def get_pvmask(self, map_geoms: Dict,ego2img: List, img_filenames: Lis...
method __call__ (line 317) | def __call__(self, input_dict: Dict) -> Dict:
method __repr__ (line 323) | def __repr__(self):
method visualize_all_pv_masks (line 332) | def visualize_all_pv_masks(self, gt_pv_mask, img_filenames):
method _visualize_pv_mask (line 349) | def _visualize_pv_mask(self, pv_mask):
FILE: plugin/datasets/pipelines/transform.py
class Normalize3D (line 8) | class Normalize3D(object):
method __init__ (line 18) | def __init__(self, mean, std, to_rgb=True):
method __call__ (line 23) | def __call__(self, results):
method __repr__ (line 38) | def __repr__(self):
class PadMultiViewImages (line 45) | class PadMultiViewImages(object):
method __init__ (line 59) | def __init__(self, size=None, size_divisor=None, pad_val=0, change_int...
method _pad_img (line 69) | def _pad_img(self, results):
method __call__ (line 109) | def __call__(self, results):
method __repr__ (line 119) | def __repr__(self):
class ResizeMultiViewImages (line 130) | class ResizeMultiViewImages(object):
method __init__ (line 138) | def __init__(self, size=None, scale=None, change_intrinsics=True):
method __call__ (line 144) | def __call__(self, results:dict):
method __repr__ (line 184) | def __repr__(self):
class PhotoMetricDistortionMultiViewImage (line 193) | class PhotoMetricDistortionMultiViewImage:
method __init__ (line 212) | def __init__(self,
method __call__ (line 222) | def __call__(self, results):
method __repr__ (line 281) | def __repr__(self):
FILE: plugin/datasets/pipelines/vectorize.py
class VectorizeMap (line 8) | class VectorizeMap(object):
method __init__ (line 23) | def __init__(self,
method interp_fixed_num (line 49) | def interp_fixed_num(self, line: LineString) -> NDArray:
method interp_fixed_dist (line 65) | def interp_fixed_dist(self, line: LineString) -> NDArray:
method get_vectorized_lines (line 84) | def get_vectorized_lines(self, map_geoms: Dict) -> Dict:
method normalize_line (line 121) | def normalize_line(self, line: NDArray) -> NDArray:
method permute_line (line 141) | def permute_line(self, line: np.ndarray, padding=1e5):
method __call__ (line 176) | def __call__(self, input_dict):
method __repr__ (line 182) | def __repr__(self):
FILE: plugin/datasets/samplers/distributed_sampler.py
class DistributedSampler (line 15) | class DistributedSampler(_DistributedSampler):
method __init__ (line 17) | def __init__(self,
method __iter__ (line 52) | def __iter__(self):
FILE: plugin/datasets/samplers/group_sampler.py
class GroupSampler (line 19) | class GroupSampler(Sampler):
method __init__ (line 21) | def __init__(self, dataset, samples_per_gpu=1):
method __iter__ (line 34) | def __iter__(self):
method __len__ (line 42) | def __len__(self):
class DistributedGroupSampler (line 46) | class DistributedGroupSampler(Sampler):
method __init__ (line 64) | def __init__(self,
method __iter__ (line 93) | def __iter__(self):
method __len__ (line 136) | def __len__(self):
method set_epoch (line 139) | def set_epoch(self, epoch):
function sync_random_seed (line 143) | def sync_random_seed(seed=None, device='cuda'):
class InfiniteGroupEachSampleInBatchSampler (line 178) | class InfiniteGroupEachSampleInBatchSampler(Sampler):
method __init__ (line 199) | def __init__(self,
method _infinite_group_indices (line 256) | def _infinite_group_indices(self):
method _group_indices_per_global_sample_idx (line 262) | def _group_indices_per_global_sample_idx(self, global_sample_idx):
method _sample_sub_sequence (line 268) | def _sample_sub_sequence(self, group_idx):
method __iter__ (line 296) | def __iter__(self):
method __len__ (line 319) | def __len__(self):
method set_epoch (line 323) | def set_epoch(self, epoch):
FILE: plugin/datasets/samplers/sampler.py
function build_sampler (line 13) | def build_sampler(cfg, default_args):
FILE: plugin/datasets/visualize/renderer.py
function remove_nan_values (line 13) | def remove_nan_values(uv):
function points_ego2img (line 21) | def points_ego2img(pts_ego, extrinsics, intrinsics):
function draw_polyline_ego_on_img (line 32) | def draw_polyline_ego_on_img(polyline_ego, img_bgr, extrinsics, intrinsi...
function draw_visible_polyline_cv2 (line 61) | def draw_visible_polyline_cv2(line, valid_pts_bool, image, color, thickn...
class Renderer (line 111) | class Renderer(object):
method __init__ (line 120) | def __init__(self, cat2id, roi_size, dataset='av2'):
method render_bev_from_vectors (line 129) | def render_bev_from_vectors(self, vectors, out_dir, draw_scores=False,...
method render_camera_views_from_vectors (line 188) | def render_camera_views_from_vectors(self, vectors, imgs, extrinsics,
method render_bev_from_mask (line 220) | def render_bev_from_mask(self, semantic_mask, out_dir, flip=False):
FILE: plugin/models/assigner/assigner.py
class HungarianLinesAssigner (line 12) | class HungarianLinesAssigner(BaseAssigner):
method __init__ (line 31) | def __init__(self,
method assign (line 41) | def assign(self,
FILE: plugin/models/assigner/match_cost.py
function chamfer_distance (line 8) | def chamfer_distance(line1, line2) -> float:
class ClsSigmoidCost (line 28) | class ClsSigmoidCost:
method __init__ (line 34) | def __init__(self, weight=1.):
method __call__ (line 37) | def __call__(self, cls_pred, gt_labels):
class LinesFixNumChamferCost (line 56) | class LinesFixNumChamferCost(object):
method __init__ (line 62) | def __init__(self, weight=1.0, permute=False):
method __call__ (line 66) | def __call__(self, lines_pred, gt_lines):
class LinesL1Cost (line 110) | class LinesL1Cost(object):
method __init__ (line 116) | def __init__(self, weight=1.0, beta=0.0, permute=False):
method __call__ (line 121) | def __call__(self, lines_pred, gt_lines, **kwargs):
class BBoxCostC (line 165) | class BBoxCostC:
method __init__ (line 181) | def __init__(self, weight=1., box_format='xyxy'):
method __call__ (line 186) | def __call__(self, bbox_pred, gt_bboxes):
class IoUCostC (line 206) | class IoUCostC:
method __init__ (line 222) | def __init__(self, iou_mode='giou', weight=1., box_format='xywh'):
method __call__ (line 228) | def __call__(self, bboxes, gt_bboxes):
class DynamicLinesCost (line 250) | class DynamicLinesCost(object):
method __init__ (line 256) | def __init__(self, weight=1.):
method __call__ (line 259) | def __call__(self, lines_pred, lines_gt, masks_pred, masks_gt):
method cal_dist (line 281) | def cal_dist(self, x1, x2):
method get_dynamic_line (line 298) | def get_dynamic_line(self, mat, m1, m2):
class BBoxLogitsCost (line 324) | class BBoxLogitsCost(object):
method __init__ (line 330) | def __init__(self, weight=1.):
method calNLL (line 333) | def calNLL(self, logits, value):
method __call__ (line 350) | def __call__(self, bbox_pred, bbox_gt, **kwargs):
class MapQueriesCost (line 365) | class MapQueriesCost(object):
method __init__ (line 367) | def __init__(self, cls_cost, reg_cost, iou_cost=None):
method __call__ (line 376) | def __call__(self, preds: dict, gts: dict):
FILE: plugin/models/backbones/bevformer/custom_base_transformer_layer.py
class MyCustomBaseTransformerLayer (line 38) | class MyCustomBaseTransformerLayer(BaseModule):
method __init__ (line 72) | def __init__(self,
method forward (line 165) | def forward(self,
class MyCustomBaseTransformerLayerWithoutSelfAttn (line 265) | class MyCustomBaseTransformerLayerWithoutSelfAttn(BaseModule):
method __init__ (line 299) | def __init__(self,
method forward (line 392) | def forward(self,
FILE: plugin/models/backbones/bevformer/encoder.py
class BEVFormerEncoder (line 27) | class BEVFormerEncoder(TransformerLayerSequence):
method __init__ (line 38) | def __init__(self, *args, pc_range=None, num_points_in_pillar=4, retur...
method get_reference_points (line 55) | def get_reference_points(H, W, Z=8, num_points_in_pillar=4, dim='3d', ...
method point_sampling (line 102) | def point_sampling(self, reference_points, pc_range, img_metas):
method forward (line 158) | def forward(self,
class BEVFormerLayer (line 259) | class BEVFormerLayer(MyCustomBaseTransformerLayer):
method __init__ (line 280) | def __init__(self,
method forward (line 303) | def forward(self,
FILE: plugin/models/backbones/bevformer/grid_mask.py
class Grid (line 7) | class Grid(object):
method __init__ (line 8) | def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5...
method set_prob (line 18) | def set_prob(self, epoch, max_epoch):
method __call__ (line 21) | def __call__(self, img, label):
class GridMask (line 70) | class GridMask(nn.Module):
method __init__ (line 71) | def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5...
method set_prob (line 83) | def set_prob(self, epoch, max_epoch):
method set_ratio_and_prob (line 86) | def set_ratio_and_prob(self, ratio, prob):
method forward (line 91) | def forward(self, x):
FILE: plugin/models/backbones/bevformer/multi_scale_deformable_attn_function.py
class MultiScaleDeformableAttnFunction_fp16 (line 15) | class MultiScaleDeformableAttnFunction_fp16(Function):
method forward (line 19) | def forward(ctx, value, value_spatial_shapes, value_level_start_index,
method backward (line 57) | def backward(ctx, grad_output):
class MultiScaleDeformableAttnFunction_fp32 (line 90) | class MultiScaleDeformableAttnFunction_fp32(Function):
method forward (line 94) | def forward(ctx, value, value_spatial_shapes, value_level_start_index,
method backward (line 133) | def backward(ctx, grad_output):
FILE: plugin/models/backbones/bevformer/spatial_cross_attention.py
class SpatialCrossAttention (line 31) | class SpatialCrossAttention(BaseModule):
method __init__ (line 44) | def __init__(self,
method init_weight (line 70) | def init_weight(self):
method forward (line 75) | def forward(self,
class MSDeformableAttention3D (line 178) | class MSDeformableAttention3D(BaseModule):
method __init__ (line 203) | def __init__(self,
method init_weights (line 252) | def init_weights(self):
method forward (line 272) | def forward(self,
class MSIPM3D (line 403) | class MSIPM3D(BaseModule):
method __init__ (line 428) | def __init__(self,
method init_weights (line 477) | def init_weights(self):
method forward (line 498) | def forward(self,
FILE: plugin/models/backbones/bevformer/temporal_net.py
class MyResBlock (line 12) | class MyResBlock(nn.Module):
method __init__ (line 13) | def __init__(self,
method forward (line 31) | def forward(self, x: Tensor) -> Tensor:
class TemporalNet (line 48) | class TemporalNet(nn.Module):
method __init__ (line 49) | def __init__(self, history_steps, hidden_dims, num_blocks):
method init_weights (line 67) | def init_weights(self):
method forward (line 73) | def forward(self, history_feats, curr_feat):
FILE: plugin/models/backbones/bevformer/temporal_self_attention.py
class TemporalSelfAttention (line 25) | class TemporalSelfAttention(BaseModule):
method __init__ (line 54) | def __init__(self,
method init_weights (line 106) | def init_weights(self):
method forward (line 127) | def forward(self,
FILE: plugin/models/backbones/bevformer/transformer.py
class PerceptionTransformer (line 26) | class PerceptionTransformer(BaseModule):
method __init__ (line 37) | def __init__(self,
method init_layers (line 56) | def init_layers(self):
method init_weights (line 64) | def init_weights(self):
method get_bev_features (line 80) | def get_bev_features(
method forward (line 149) | def forward(self,
FILE: plugin/models/backbones/bevformer_backbone.py
class UpsampleBlock (line 15) | class UpsampleBlock(nn.Module):
method __init__ (line 16) | def __init__(self, ins, outs):
method init_weights (line 23) | def init_weights(self):
method forward (line 28) | def forward(self, x):
method upsample2x (line 36) | def upsample2x(self, x):
class BEVFormerBackbone (line 43) | class BEVFormerBackbone(nn.Module):
method __init__ (line 55) | def __init__(self,
method _init_layers (line 107) | def _init_layers(self):
method init_weights (line 113) | def init_weights(self):
method extract_img_feat (line 123) | def extract_img_feat(self, img, img_metas, len_queue=None):
method forward (line 159) | def forward(self, img, img_metas, timestep, history_bev_feats, history...
FILE: plugin/models/heads/MapDetectorHead.py
class MapDetectorHead (line 18) | class MapDetectorHead(nn.Module):
method __init__ (line 20) | def __init__(self,
method init_weights (line 81) | def init_weights(self):
method _init_embedding (line 115) | def _init_embedding(self):
method _init_branch (line 129) | def _init_branch(self,):
method _prepare_context (line 162) | def _prepare_context(self, bev_features):
method forward_train (line 175) | def forward_train(self, bev_features, img_metas, gts, track_query_info...
method forward_test (line 275) | def forward_test(self, bev_features, img_metas, track_query_info=None,...
method _get_target_single (line 359) | def _get_target_single(self,
method get_targets (line 443) | def get_targets(self, preds, gts, track_info=None, gt_bboxes_ignore_li...
method loss_single (line 507) | def loss_single(self,
method loss (line 593) | def loss(self,
method post_process (line 644) | def post_process(self, preds_dict, tokens, track_dict=None, thr=0.0):
method prepare_temporal_propagation (line 709) | def prepare_temporal_propagation(self, preds_dict, scene_name, local_i...
method get_track_info (line 785) | def get_track_info(self, scene_name, local_idx):
method get_self_iter_track_query (line 801) | def get_self_iter_track_query(self, preds_dict):
method clear_temporal_cache (line 821) | def clear_temporal_cache(self):
method train (line 824) | def train(self, *args, **kwargs):
method eval (line 827) | def eval(self):
method forward (line 830) | def forward(self, *args, return_loss=True, **kwargs):
FILE: plugin/models/heads/MapSegHead.py
class MapSegHead (line 17) | class MapSegHead(nn.Module):
method __init__ (line 19) | def __init__(self,
method init_weights (line 65) | def init_weights(self):
method forward_train (line 71) | def forward_train(self, bev_features, gts, history_coords):
method forward_test (line 87) | def forward_test(self, bev_features):
method train (line 97) | def train(self, *args, **kwargs):
method eval (line 100) | def eval(self):
method forward (line 103) | def forward(self, *args, return_loss=True, **kwargs):
FILE: plugin/models/heads/base_map_head.py
class BaseMapHead (line 10) | class BaseMapHead(nn.Module, metaclass=ABCMeta):
method __init__ (line 13) | def __init__(self):
method init_weights (line 17) | def init_weights(self, pretrained=None):
method forward (line 28) | def forward(self, *args, **kwargs):
method loss (line 32) | def loss(self, pred, gt):
method post_process (line 47) | def post_process(self, pred):
FILE: plugin/models/losses/detr_loss.py
class LinesL1Loss (line 12) | class LinesL1Loss(nn.Module):
method __init__ (line 14) | def __init__(self, reduction='mean', loss_weight=1.0, beta=0.5):
method forward (line 28) | def forward(self,
function bce (line 69) | def bce(pred, label, class_weight=None):
class MasksLoss (line 86) | class MasksLoss(nn.Module):
method __init__ (line 88) | def __init__(self, reduction='mean', loss_weight=1.0):
method forward (line 93) | def forward(self,
function ce (line 114) | def ce(pred, label, class_weight=None):
class LenLoss (line 130) | class LenLoss(nn.Module):
method __init__ (line 132) | def __init__(self, reduction='mean', loss_weight=1.0):
method forward (line 137) | def forward(self,
FILE: plugin/models/losses/seg_loss.py
function py_sigmoid_focal_loss (line 12) | def py_sigmoid_focal_loss(pred,
class MaskFocalLoss (line 61) | class MaskFocalLoss(FocalLoss):
method __init__ (line 62) | def __init__(self,**kwargs):
method forward (line 65) | def forward(self,
class MaskDiceLoss (line 94) | class MaskDiceLoss(nn.Module):
method __init__ (line 106) | def __init__(self, loss_weight):
method forward (line 111) | def forward(self, pred, target):
FILE: plugin/models/mapers/MapTracker.py
class MapTracker (line 22) | class MapTracker(BaseMapper):
method __init__ (line 24) | def __init__(self,
method init_weights (line 111) | def init_weights(self, pretrained=None):
method temporal_propagate (line 124) | def temporal_propagate(self, curr_bev_feats, img_metas, all_history_cu...
method add_noise_to_pose (line 280) | def add_noise_to_pose(self, rot, trans):
method process_history_info (line 294) | def process_history_info(self, img_metas, history_img_metas):
method forward_train (line 344) | def forward_train(self, img, vectors, semantic_mask, points=None, img_...
method forward_test (line 587) | def forward_test(self, img, points=None, img_metas=None, seq_info=None...
method batch_data (line 694) | def batch_data(self, vectors, imgs, img_metas, device, points=None):
method get_two_frame_matching (line 741) | def get_two_frame_matching(self, local2global_prev, local2global_curr,...
method _compute_cur2prev (line 766) | def _compute_cur2prev(self, gt2local_curr, gt2local_prev, local2gt_prev,
method _reverse_id_mapping (line 786) | def _reverse_id_mapping(self, id_mapping):
method prepare_track_queries_and_targets (line 793) | def prepare_track_queries_and_targets(self, gts, prev_inds_list, prev_...
method _batchify_tracks (line 909) | def _batchify_tracks(self, targets):
method train (line 926) | def train(self, *args, **kwargs):
method eval (line 935) | def eval(self):
method _freeze_bev (line 938) | def _freeze_bev(self,):
method _unfreeze_bev (line 946) | def _unfreeze_bev(self,):
method _denorm_lines (line 954) | def _denorm_lines(self, line_pts):
method _norm_lines (line 962) | def _norm_lines(self, line_pts):
method _process_track_query_info (line 970) | def _process_track_query_info(self, track_info):
method select_memory_entries (line 987) | def select_memory_entries(self, history_metas, curr_meta):
method _viz_temporal_supervision (line 1026) | def _viz_temporal_supervision(self, outputs_prev, all_track_info, gts,...
FILE: plugin/models/mapers/base_mapper.py
class BaseMapper (line 12) | class BaseMapper(nn.Module, metaclass=ABCMeta):
method __init__ (line 15) | def __init__(self):
method with_neck (line 20) | def with_neck(self):
method with_shared_head (line 27) | def with_shared_head(self):
method with_bbox (line 32) | def with_bbox(self):
method with_mask (line 38) | def with_mask(self):
method extract_feat (line 44) | def extract_feat(self, imgs):
method forward_train (line 48) | def forward_train(self, *args, **kwargs):
method simple_test (line 52) | def simple_test(self, img, img_metas, **kwargs):
method aug_test (line 56) | def aug_test(self, imgs, img_metas, **kwargs):
method init_weights (line 60) | def init_weights(self, pretrained=None):
method forward_test (line 71) | def forward_test(self, *args, **kwargs):
method forward (line 81) | def forward(self, *args, return_loss=True, **kwargs):
method train_step (line 97) | def train_step(self, data_dict, optimizer):
method val_step (line 131) | def val_step(self, data, optimizer):
method show_result (line 145) | def show_result(self,
FILE: plugin/models/mapers/vector_memory.py
function get_emb (line 9) | def get_emb(sin_inp):
class PositionalEncoding1D (line 17) | class PositionalEncoding1D(nn.Module):
method __init__ (line 18) | def __init__(self, channels):
method forward (line 30) | def forward(self, tensor):
class VectorInstanceMemory (line 53) | class VectorInstanceMemory(nn.Module):
method __init__ (line 55) | def __init__(self,
method set_bank_size (line 73) | def set_bank_size(self, bank_size):
method init_memory (line 76) | def init_memory(self, bs):
method update_memory (line 97) | def update_memory(self, batch_i, is_first_frame, propagated_ids, prev_...
method prepare_transformation_batch (line 158) | def prepare_transformation_batch(self,history_e2g_trans,history_e2g_ro...
method clear_dict (line 180) | def clear_dict(self,):
method trans_memory_bank (line 185) | def trans_memory_bank(self, query_prop, b_i, metas):
method add_noise_to_pose (line 270) | def add_noise_to_pose(self, rot, trans):
method select_memory_entries (line 284) | def select_memory_entries(self, mem_trans, curr_meta):
FILE: plugin/models/necks/gru.py
class ConvGRU (line 8) | class ConvGRU(nn.Module):
method __init__ (line 9) | def __init__(self, out_channels):
method init_weights (line 23) | def init_weights(self):
method forward (line 30) | def forward(self, h, x):
FILE: plugin/models/transformer_utils/CustomMSDeformableAttention.py
class CustomMSDeformableAttention (line 32) | class CustomMSDeformableAttention(BaseModule):
method __init__ (line 59) | def __init__(self,
method init_weights (line 111) | def init_weights(self):
method forward (line 134) | def forward(self,
FILE: plugin/models/transformer_utils/MapTransformer.py
class MapTransformerDecoder_new (line 25) | class MapTransformerDecoder_new(BaseModule):
method __init__ (line 33) | def __init__(self,
method forward (line 58) | def forward(self,
class MapTransformerLayer (line 137) | class MapTransformerLayer(BaseTransformerLayer):
method __init__ (line 173) | def __init__(self,
method forward (line 199) | def forward(self,
class MapTransformer (line 346) | class MapTransformer(Transformer):
method __init__ (line 357) | def __init__(self,
method init_layers (line 369) | def init_layers(self):
method init_weights (line 374) | def init_weights(self):
method forward (line 383) | def forward(self,
FILE: plugin/models/transformer_utils/base_transformer.py
class PlaceHolderEncoder (line 17) | class PlaceHolderEncoder(nn.Module):
method __init__ (line 19) | def __init__(self, *args, embed_dims=None, **kwargs):
method forward (line 23) | def forward(self, *args, query=None, **kwargs):
FILE: plugin/models/transformer_utils/deformable_transformer.py
function inverse_sigmoid (line 30) | def inverse_sigmoid(x, eps=1e-5):
class DeformableDetrTransformerDecoder_ (line 48) | class DeformableDetrTransformerDecoder_(TransformerLayerSequence):
method __init__ (line 56) | def __init__(self, *args,
method forward (line 64) | def forward(self,
class DeformableDetrTransformer_ (line 130) | class DeformableDetrTransformer_(Transformer):
method __init__ (line 141) | def __init__(self,
method init_layers (line 155) | def init_layers(self):
method init_weights (line 169) | def init_weights(self):
method get_reference_points (line 184) | def get_reference_points(spatial_shapes, valid_ratios, device):
method get_valid_ratio (line 216) | def get_valid_ratio(self, mask):
method get_proposal_pos_embed (line 226) | def get_proposal_pos_embed(self,
method forward (line 244) | def forward(self,
FILE: plugin/models/transformer_utils/fp16_dattn.py
class MultiScaleDeformableAttentionFp16 (line 36) | class MultiScaleDeformableAttentionFp16(BaseModule):
method __init__ (line 38) | def __init__(self, attn_cfg=None,init_cfg=None,**kwarg):
method forward (line 48) | def forward(self, query,
class MultiScaleDeformableAttnFunctionFp32 (line 71) | class MultiScaleDeformableAttnFunctionFp32(Function):
method forward (line 75) | def forward(ctx, value, value_spatial_shapes, value_level_start_index,
method backward (line 112) | def backward(ctx, grad_output):
function multi_scale_deformable_attn_pytorch (line 143) | def multi_scale_deformable_attn_pytorch(value, value_spatial_shapes,
class MultiScaleDeformableAttentionFP32 (line 202) | class MultiScaleDeformableAttentionFP32(BaseModule):
method __init__ (line 227) | def __init__(self,
method init_weights (line 275) | def init_weights(self):
method forward (line 297) | def forward(self,
FILE: plugin/models/utils/query_update.py
class Embedder (line 8) | class Embedder:
method __init__ (line 9) | def __init__(self, **kwargs):
method create_embedding_fn (line 13) | def create_embedding_fn(self):
method embed (line 37) | def embed(self, inputs):
class MotionMLP (line 41) | class MotionMLP(nn.Module):
method __init__ (line 48) | def __init__(self, c_dim, f_dim=512, identity=True):
method init_weights (line 73) | def init_weights(self):
method forward (line 80) | def forward(self, x, pose_info):
FILE: plugin/models/utils/renderer_track.py
function remove_nan_values (line 10) | def remove_nan_values(uv):
function points_ego2img (line 18) | def points_ego2img(pts_ego, extrinsics, intrinsics):
function draw_polyline_ego_on_img (line 29) | def draw_polyline_ego_on_img(polyline_ego, img_bgr, extrinsics, intrinsi...
function draw_visible_polyline_cv2 (line 58) | def draw_visible_polyline_cv2(line, valid_pts_bool, image, color, thickn...
class Renderer (line 108) | class Renderer(object):
method __init__ (line 117) | def __init__(self, cat2id, roi_size, dataset='av2'):
method render_bev_from_vectors (line 126) | def render_bev_from_vectors(self, vectors, labels, out_path, id_info=N...
method render_bev_from_mask (line 174) | def render_bev_from_mask(self, semantic_mask, out_path):
FILE: tools/benchmark.py
function parse_args (line 13) | def parse_args():
function main (line 29) | def main():
FILE: tools/data_converter/argoverse_converter.py
function parse_args (line 29) | def parse_args():
function create_av2_infos_mp (line 47) | def create_av2_infos_mp(root_path,
function get_data_from_logid (line 135) | def get_data_from_logid(log_id, loaders, data_root):
FILE: tools/data_converter/nuscenes_converter.py
function parse_args (line 19) | def parse_args():
function create_nuscenes_infos_map (line 36) | def create_nuscenes_infos_map(root_path,
FILE: tools/mmdet_test.py
function single_gpu_test (line 16) | def single_gpu_test(model,
function multi_gpu_test (line 69) | def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
function collect_results_cpu (line 117) | def collect_results_cpu(result_part, size, tmpdir=None):
function collect_results_gpu (line 160) | def collect_results_gpu(result_part, size):
FILE: tools/mmdet_train.py
function set_random_seed (line 18) | def set_random_seed(seed, deterministic=False):
function train_detector (line 37) | def train_detector(model,
FILE: tools/test.py
function parse_args (line 21) | def parse_args():
function main (line 103) | def main():
FILE: tools/tracking/calculate_cmap.py
function parse_args (line 53) | def parse_args():
function instance_match (line 79) | def instance_match(pred_lines, scores, gt_lines, threshold, metric='cham...
function _evaluate_single (line 119) | def _evaluate_single(pred_vectors, scores, gt_vectors, threshold, metric...
function match_gt_w_pred (line 135) | def match_gt_w_pred(curr_data,curr_data_gt,thresh):
function get_scene_matching_result (line 170) | def get_scene_matching_result(gts,pred_results,scene_name2token,scene_na...
function pred2gt_global_matching (line 207) | def pred2gt_global_matching(ids_info,ids_info_gt,pred2gt_seq):
function get_tpfp_from_scene_single (line 232) | def get_tpfp_from_scene_single(scene_name,args,scene_name2token,pred_res...
function get_mAP (line 281) | def get_mAP(tpfp_score_record,num_gts,threshold):
function main (line 306) | def main():
FILE: tools/tracking/cmap_utils/data_utils.py
function get_gts (line 18) | def get_gts(dataset,new_split=False,N_WORKERS=16):
function prepare_data_multi (line 55) | def prepare_data_multi(token,idx,pred,gts,origin,roi_size,interp_num,dat...
function get_data (line 84) | def get_data(pred_matching_result_raw,gts,origin,roi_size,num_interp,res...
FILE: tools/tracking/cmap_utils/match_utils.py
function get_prev2curr_matrix (line 13) | def get_prev2curr_matrix(prev_meta,curr_meta):
function find_matchings_iou (line 32) | def find_matchings_iou(src_masks, tgt_masks, thresh=0.1):
function find_matchings_chamfer (line 69) | def find_matchings_chamfer(pred_vectors, gt_vectors, score_dict,thresh=0...
function get_consecutive_vectors (line 101) | def get_consecutive_vectors(prev_vectors,curr_vectors,prev2curr_matrix,o...
function filter_vectors (line 136) | def filter_vectors(data_info, origin,roi_size,thr,num_interp=20):
FILE: tools/tracking/cmap_utils/utils.py
function import_plugin (line 8) | def import_plugin(cfg):
function draw_polylines (line 33) | def draw_polylines(vecs, roi_size, origin, cfg):
function draw_polygons (line 47) | def draw_polygons(vecs, roi_size, origin, cfg):
function draw_instance_masks (line 65) | def draw_instance_masks(vectors, roi_size, origin, cfg):
function interp_fixed_num (line 78) | def interp_fixed_num(vector, num_pts):
function chamfer_distance_batch (line 87) | def chamfer_distance_batch(pred_lines, gt_lines):
function average_precision (line 110) | def average_precision(recalls, precisions, mode='area'):
FILE: tools/tracking/prepare_gt_tracks.py
function parse_args (line 27) | def parse_args():
function import_plugin (line 48) | def import_plugin(cfg):
function draw_polylines (line 76) | def draw_polylines(vecs, roi_size, origin, cfg):
function draw_polygons (line 91) | def draw_polygons(vecs, roi_size, origin, cfg):
function draw_instance_masks (line 109) | def draw_instance_masks(vectors, roi_size, origin, cfg):
function _mask_iou (line 119) | def _mask_iou(mask1, mask2):
function find_matchings (line 127) | def find_matchings(src_masks, tgt_masks, thresh=0.1):
function match_two_consecutive_frames (line 157) | def match_two_consecutive_frames(prev_data, curr_data, roi_size, origin,...
function assign_global_ids (line 238) | def assign_global_ids(matchings_seq, vectors_seq):
function _denorm (line 274) | def _denorm(vectors, roi_size, origin):
function form_gt_track_single (line 281) | def form_gt_track_single(scene_name, scene_name2idx, dataset, out_dir, c...
function main (line 341) | def main():
FILE: tools/tracking/prepare_pred_tracks.py
function parse_args (line 32) | def parse_args():
function match_two_consecutive_frames_pred (line 59) | def match_two_consecutive_frames_pred(args,prev_data,prev_meta, curr_da...
function collect_pred (line 76) | def collect_pred(data,thr):
function get_scene_matching_result (line 86) | def get_scene_matching_result(args,cfg,pred_results,dataset,origin,roi_s...
function generate_results (line 176) | def generate_results(ids_info,vectors_seq,scores_seq,meta_list,scene_name):
function get_matching_single (line 204) | def get_matching_single(scene_name,args,scene_name2idx,dataset,cfg,pred_...
function main (line 213) | def main():
FILE: tools/train.py
function parse_args (line 30) | def parse_args():
function main (line 99) | def main():
FILE: tools/visualization/vis_global.py
function parse_args (line 28) | def parse_args():
function combine_images_with_labels (line 94) | def combine_images_with_labels(image_paths, labels, output_path, font_sc...
function merge_corssing (line 126) | def merge_corssing(polylines):
function find_largest_convex_hull (line 131) | def find_largest_convex_hull(polylines):
function project_point_onto_line (line 152) | def project_point_onto_line(point, line):
function find_nearest_projection_on_polyline (line 166) | def find_nearest_projection_on_polyline(point, polyline):
function find_and_sort_intersections (line 180) | def find_and_sort_intersections(segmenet1, segment2):
function get_intersection_point_on_line (line 213) | def get_intersection_point_on_line(line, intersection):
function merge_l2_points_to_l1 (line 231) | def merge_l2_points_to_l1(line1, line2, line2_intersect_start, line2_int...
function segment_line_based_on_merged_area (line 261) | def segment_line_based_on_merged_area(line, merged_points):
function get_bbox_size_for_points (line 296) | def get_bbox_size_for_points(points):
function get_longer_segmenent_to_merged_points (line 312) | def get_longer_segmenent_to_merged_points(l1_segment, l2_segment, merged...
function get_line_lineList_max_intersection (line 385) | def get_line_lineList_max_intersection(merged_lines, line, thickness=4):
function algin_l2_with_l1 (line 401) | def algin_l2_with_l1(line1, line2):
function _is_u_shape (line 455) | def _is_u_shape(line, direction):
function check_circle (line 473) | def check_circle(pre_line, vec):
function connect_polygon (line 488) | def connect_polygon(merged_polyline, merged_lines):
function iou_merge_boundry (line 506) | def iou_merge_boundry(merged_lines, vec, thickness=1):
function iou_merge_divider (line 588) | def iou_merge_divider(merged_lines, vec, thickness=1):
function merge_divider (line 628) | def merge_divider(vecs=None, thickness=1):
function merge_boundary (line 657) | def merge_boundary(vecs=None, thickness=1, iou_threshold=0.95):
function get_consecutive_vectors_with_opt (line 688) | def get_consecutive_vectors_with_opt(prev_vectors=None,prev2curr_matrix=...
function get_prev2curr_vectors (line 720) | def get_prev2curr_vectors(vecs=None, prev2curr_matrix=None,origin=None,r...
function plot_fig_merged_per_frame (line 744) | def plot_fig_merged_per_frame(num_frames, car_trajectory, x_min, x_max, ...
function plot_fig_merged (line 885) | def plot_fig_merged(car_trajectory, x_min, x_max, y_min, y_max, pred_sav...
function plot_fig_unmerged_per_frame (line 975) | def plot_fig_unmerged_per_frame(num_frames, car_trajectory, x_min, x_max...
function plot_fig_unmerged (line 1046) | def plot_fig_unmerged(car_trajectory, x_min, x_max, y_min, y_max, pred_s...
function save_t (line 1111) | def save_t(t_max, main_save_folder):
function save_as_video (line 1132) | def save_as_video(image_list, mp4_output_path, scale=None):
function vis_pred_data (line 1150) | def vis_pred_data(scene_name="", pred_results=None, origin=None, roi_siz...
function vis_gt_data (line 1247) | def vis_gt_data(scene_name, args, dataset, gt_data, origin, roi_size):
function main (line 1355) | def main():
FILE: tools/visualization/vis_per_frame.py
function parse_args (line 23) | def parse_args():
function save_as_video (line 70) | def save_as_video(image_list, mp4_output_path, scale=None):
function plot_one_frame_results (line 85) | def plot_one_frame_results(vectors, id_info, roi_size, scene_dir, args):
function vis_pred_data (line 142) | def vis_pred_data(scene_name, args, pred_results, origin,roi_size):
function vis_gt_data (line 206) | def vis_gt_data(scene_name, args, dataset, scene_name2idx, gt_data, orig...
function main (line 249) | def main():
Condensed preview — 161 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (1,200K chars).
[
{
"path": ".gitignore",
"chars": 1604,
"preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n*.ipynb\n\n# C extensions\n*.so\n\n# Distribution /"
},
{
"path": "LICENSE",
"chars": 290,
"preview": "The code, data, and model weights in this repository are not allowed for commercial usage. For research purposes, the te"
},
{
"path": "LICENSE_GPL",
"chars": 35148,
"preview": " GNU GENERAL PUBLIC LICENSE\n Version 3, 29 June 2007\n\n Copyright (C) 2007 Free "
},
{
"path": "README.md",
"chars": 5482,
"preview": "<div align=\"center\">\n<h2 align=\"center\"> MapTracker: Tracking with Strided Memory Fusion for <br/> Consistent Vector HD "
},
{
"path": "docs/data_preparation.md",
"chars": 3565,
"preview": "\n# Data Preparation\n\nCompared to the data preparation procedure of StreamMapNet or MapTR, we have one more step to gener"
},
{
"path": "docs/getting_started.md",
"chars": 5624,
"preview": "# Getting started with MapTracker\n\nIn this document, we provide the commands for running inference/evaluation, training,"
},
{
"path": "docs/installation.md",
"chars": 783,
"preview": "# Environment Setup\n\nWe use the same environment as StreamMapNet and the environment setup is largely borrowed from thei"
},
{
"path": "plugin/__init__.py",
"chars": 45,
"preview": "from .models import *\nfrom .datasets import *"
},
{
"path": "plugin/configs/_base_/datasets/coco_instance.py",
"chars": 1718,
"preview": "dataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n mean=[123.675, 116.28, 103.53], std=[58.3"
},
{
"path": "plugin/configs/_base_/datasets/kitti-3d-3class.py",
"chars": 4597,
"preview": "# dataset settings\ndataset_type = 'KittiDataset'\ndata_root = 'data/kitti/'\nclass_names = ['Pedestrian', 'Cyclist', 'Car'"
},
{
"path": "plugin/configs/_base_/datasets/kitti-3d-car.py",
"chars": 4503,
"preview": "# dataset settings\ndataset_type = 'KittiDataset'\ndata_root = 'data/kitti/'\nclass_names = ['Car']\npoint_cloud_range = [0,"
},
{
"path": "plugin/configs/_base_/datasets/lyft-3d.py",
"chars": 4568,
"preview": "# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\npoint_cloud_range"
},
{
"path": "plugin/configs/_base_/datasets/nuim_instance.py",
"chars": 1991,
"preview": "dataset_type = 'CocoDataset'\ndata_root = 'data/nuimages/'\nclass_names = [\n 'car', 'truck', 'trailer', 'bus', 'constru"
},
{
"path": "plugin/configs/_base_/datasets/nus-3d.py",
"chars": 4915,
"preview": "# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\npoint_cloud_range"
},
{
"path": "plugin/configs/_base_/datasets/nus-mono3d.py",
"chars": 3230,
"preview": "dataset_type = 'NuScenesMonoDataset'\ndata_root = 'data/nuscenes/'\nclass_names = [\n 'car', 'truck', 'trailer', 'bus', "
},
{
"path": "plugin/configs/_base_/datasets/range100_lyft-3d.py",
"chars": 4572,
"preview": "# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\npoint_cloud_range"
},
{
"path": "plugin/configs/_base_/datasets/s3dis_seg-3d-13class.py",
"chars": 4323,
"preview": "# dataset settings\ndataset_type = 'S3DISSegDataset'\ndata_root = './data/s3dis/'\nclass_names = ('ceiling', 'floor', 'wall"
},
{
"path": "plugin/configs/_base_/datasets/scannet-3d-18class.py",
"chars": 4049,
"preview": "# dataset settings\ndataset_type = 'ScanNetDataset'\ndata_root = './data/scannet/'\nclass_names = ('cabinet', 'bed', 'chair"
},
{
"path": "plugin/configs/_base_/datasets/scannet_seg-3d-20class.py",
"chars": 4253,
"preview": "# dataset settings\ndataset_type = 'ScanNetSegDataset'\ndata_root = './data/scannet/'\nclass_names = ('wall', 'floor', 'cab"
},
{
"path": "plugin/configs/_base_/datasets/sunrgbd-3d-10class.py",
"chars": 3301,
"preview": "dataset_type = 'SUNRGBDDataset'\ndata_root = 'data/sunrgbd/'\nclass_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'd"
},
{
"path": "plugin/configs/_base_/datasets/waymoD5-3d-3class.py",
"chars": 4779,
"preview": "# dataset settings\n# D5 in the config name means the whole dataset is divided into 5 folds\n# We only use one fold for ef"
},
{
"path": "plugin/configs/_base_/datasets/waymoD5-3d-car.py",
"chars": 4683,
"preview": "# dataset settings\n# D5 in the config name means the whole dataset is divided into 5 folds\n# We only use one fold for ef"
},
{
"path": "plugin/configs/_base_/default_runtime.py",
"chars": 485,
"preview": "checkpoint_config = dict(interval=1)\n# yapf:disable push\n# By default we use textlogger hook and tensorboard\n# For more "
},
{
"path": "plugin/configs/_base_/models/3dssd.py",
"chars": 3085,
"preview": "model = dict(\n type='SSD3DNet',\n backbone=dict(\n type='PointNet2SAMSG',\n in_channels=4,\n num_"
},
{
"path": "plugin/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py",
"chars": 6976,
"preview": "# model settings\nmodel = dict(\n type='CascadeRCNN',\n pretrained='torchvision://resnet50',\n backbone=dict(\n "
},
{
"path": "plugin/configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py",
"chars": 3275,
"preview": "voxel_size = [0.1, 0.1, 0.2]\nmodel = dict(\n type='CenterPoint',\n pts_voxel_layer=dict(\n max_num_points=10, "
},
{
"path": "plugin/configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py",
"chars": 3183,
"preview": "voxel_size = [0.2, 0.2, 8]\nmodel = dict(\n type='CenterPoint',\n pts_voxel_layer=dict(\n max_num_points=20, vo"
},
{
"path": "plugin/configs/_base_/models/fcos3d.py",
"chars": 2279,
"preview": "model = dict(\n type='FCOSMono3D',\n pretrained='open-mmlab://detectron2/resnet101_caffe',\n backbone=dict(\n "
},
{
"path": "plugin/configs/_base_/models/groupfree3d.py",
"chars": 2593,
"preview": "model = dict(\n type='GroupFree3DNet',\n backbone=dict(\n type='PointNet2SASSG',\n in_channels=3,\n "
},
{
"path": "plugin/configs/_base_/models/h3dnet.py",
"chars": 10970,
"preview": "primitive_z_cfg = dict(\n type='PrimitiveHead',\n num_dims=2,\n num_classes=18,\n primitive_mode='z',\n upper_"
},
{
"path": "plugin/configs/_base_/models/hv_pointpillars_fpn_lyft.py",
"chars": 963,
"preview": "_base_ = './hv_pointpillars_fpn_nus.py'\n\n# model settings (based on nuScenes model settings)\n# Voxel size for voxel enco"
},
{
"path": "plugin/configs/_base_/models/hv_pointpillars_fpn_nus.py",
"chars": 3314,
"preview": "# model settings\n# Voxel size for voxel encoder\n# Usually voxel size is changed consistently with the point cloud range\n"
},
{
"path": "plugin/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py",
"chars": 975,
"preview": "_base_ = './hv_pointpillars_fpn_nus.py'\n\n# model settings (based on nuScenes model settings)\n# Voxel size for voxel enco"
},
{
"path": "plugin/configs/_base_/models/hv_pointpillars_secfpn_kitti.py",
"chars": 3136,
"preview": "voxel_size = [0.16, 0.16, 4]\n\nmodel = dict(\n type='VoxelNet',\n voxel_layer=dict(\n max_num_points=32, # max"
},
{
"path": "plugin/configs/_base_/models/hv_pointpillars_secfpn_waymo.py",
"chars": 3987,
"preview": "# model settings\n# Voxel size for voxel encoder\n# Usually voxel size is changed consistently with the point cloud range\n"
},
{
"path": "plugin/configs/_base_/models/hv_second_secfpn_kitti.py",
"chars": 2909,
"preview": "voxel_size = [0.05, 0.05, 0.1]\n\nmodel = dict(\n type='VoxelNet',\n voxel_layer=dict(\n max_num_points=5,\n "
},
{
"path": "plugin/configs/_base_/models/hv_second_secfpn_waymo.py",
"chars": 3503,
"preview": "# model settings\n# Voxel size for voxel encoder\n# Usually voxel size is changed consistently with the point cloud range\n"
},
{
"path": "plugin/configs/_base_/models/imvotenet_image.py",
"chars": 3531,
"preview": "model = dict(\n type='ImVoteNet',\n img_backbone=dict(\n type='ResNet',\n depth=50,\n num_stages=4"
},
{
"path": "plugin/configs/_base_/models/mask_rcnn_r50_fpn.py",
"chars": 4080,
"preview": "# model settings\nmodel = dict(\n type='MaskRCNN',\n pretrained='torchvision://resnet50',\n backbone=dict(\n "
},
{
"path": "plugin/configs/_base_/models/paconv_cuda_ssg.py",
"chars": 180,
"preview": "_base_ = './paconv_ssg.py'\n\nmodel = dict(\n backbone=dict(\n sa_cfg=dict(\n type='PAConvCUDASAModule',"
},
{
"path": "plugin/configs/_base_/models/paconv_ssg.py",
"chars": 2005,
"preview": "# model settings\nmodel = dict(\n type='EncoderDecoder3D',\n backbone=dict(\n type='PointNet2SASSG',\n in"
},
{
"path": "plugin/configs/_base_/models/parta2.py",
"chars": 7012,
"preview": "# model settings\nvoxel_size = [0.05, 0.05, 0.1]\npoint_cloud_range = [0, -40, -3, 70.4, 40, 1]\n\nmodel = dict(\n type='P"
},
{
"path": "plugin/configs/_base_/models/pointnet2_msg.py",
"chars": 1223,
"preview": "_base_ = './pointnet2_ssg.py'\n\n# model settings\nmodel = dict(\n backbone=dict(\n _delete_=True,\n type='Po"
},
{
"path": "plugin/configs/_base_/models/pointnet2_ssg.py",
"chars": 1266,
"preview": "# model settings\nmodel = dict(\n type='EncoderDecoder3D',\n backbone=dict(\n type='PointNet2SASSG',\n in"
},
{
"path": "plugin/configs/_base_/models/votenet.py",
"chars": 2576,
"preview": "model = dict(\n type='VoteNet',\n backbone=dict(\n type='PointNet2SASSG',\n in_channels=4,\n num_p"
},
{
"path": "plugin/configs/_base_/schedules/cosine.py",
"chars": 536,
"preview": "# This schedule is mainly used by models with dynamic voxelization\n# optimizer\nlr = 0.003 # max learning rate\noptimizer"
},
{
"path": "plugin/configs/_base_/schedules/cyclic_20e.py",
"chars": 797,
"preview": "# For nuScenes dataset, we usually evaluate the model at the end of training.\n# Since the models are trained by 24 epoch"
},
{
"path": "plugin/configs/_base_/schedules/cyclic_40e.py",
"chars": 1572,
"preview": "# The schedule is usually used by models trained on KITTI dataset\n\n# The learning rate set in the cyclic schedule is the"
},
{
"path": "plugin/configs/_base_/schedules/mmdet_schedule_1x.py",
"chars": 319,
"preview": "# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=N"
},
{
"path": "plugin/configs/_base_/schedules/schedule_2x.py",
"chars": 459,
"preview": "# optimizer\n# This schedule is mainly used by models on nuScenes dataset\noptimizer = dict(type='AdamW', lr=0.001, weight"
},
{
"path": "plugin/configs/_base_/schedules/schedule_3x.py",
"chars": 399,
"preview": "# optimizer\n# This schedule is mainly used by models on indoor dataset,\n# e.g., VoteNet on SUNRGBD and ScanNet\nlr = 0.00"
},
{
"path": "plugin/configs/_base_/schedules/seg_cosine_150e.py",
"chars": 361,
"preview": "# optimizer\n# This schedule is mainly used on S3DIS dataset in segmentation task\noptimizer = dict(type='SGD', lr=0.2, we"
},
{
"path": "plugin/configs/_base_/schedules/seg_cosine_200e.py",
"chars": 349,
"preview": "# optimizer\n# This schedule is mainly used on ScanNet dataset in segmentation task\noptimizer = dict(type='Adam', lr=0.00"
},
{
"path": "plugin/configs/_base_/schedules/seg_cosine_50e.py",
"chars": 347,
"preview": "# optimizer\n# This schedule is mainly used on S3DIS dataset in segmentation task\noptimizer = dict(type='Adam', lr=0.001,"
},
{
"path": "plugin/configs/maptracker/av2_newsplit/maptracker_av2_100x50_newsplit_5frame_span10_stage1_bev_pretrain.py",
"chars": 13555,
"preview": "_base_ = [\n '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin"
},
{
"path": "plugin/configs/maptracker/av2_newsplit/maptracker_av2_100x50_newsplit_5frame_span10_stage2_warmup.py",
"chars": 13562,
"preview": "_base_ = [\n '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin"
},
{
"path": "plugin/configs/maptracker/av2_newsplit/maptracker_av2_100x50_newsplit_5frame_span10_stage3_joint_finetune.py",
"chars": 13572,
"preview": "_base_ = [\n '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin"
},
{
"path": "plugin/configs/maptracker/av2_newsplit/maptracker_av2_newsplit_5frame_span10_stage1_bev_pretrain.py",
"chars": 13553,
"preview": "_base_ = [\n '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin"
},
{
"path": "plugin/configs/maptracker/av2_newsplit/maptracker_av2_newsplit_5frame_span10_stage2_warmup.py",
"chars": 13553,
"preview": "_base_ = [\n '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin"
},
{
"path": "plugin/configs/maptracker/av2_newsplit/maptracker_av2_newsplit_5frame_span10_stage3_joint_finetune.py",
"chars": 13563,
"preview": "_base_ = [\n '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin"
},
{
"path": "plugin/configs/maptracker/av2_oldsplit/maptracker_av2_oldsplit_5frame_span10_stage1_bev_pretrain.py",
"chars": 13480,
"preview": "_base_ = [\n '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin"
},
{
"path": "plugin/configs/maptracker/av2_oldsplit/maptracker_av2_oldsplit_5frame_span10_stage2_warmup.py",
"chars": 13759,
"preview": "_base_ = [\n '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin"
},
{
"path": "plugin/configs/maptracker/av2_oldsplit/maptracker_av2_oldsplit_5frame_span10_stage3_joint_finetune.py",
"chars": 13518,
"preview": "_base_ = [\n '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin"
},
{
"path": "plugin/configs/maptracker/nuscenes_newsplit/maptracker_nusc_newsplit_5frame_span10_stage1_bev_pretrain.py",
"chars": 13784,
"preview": "_base_ = [\n '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin"
},
{
"path": "plugin/configs/maptracker/nuscenes_newsplit/maptracker_nusc_newsplit_5frame_span10_stage2_warmup.py",
"chars": 13641,
"preview": "_base_ = [\n '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin"
},
{
"path": "plugin/configs/maptracker/nuscenes_newsplit/maptracker_nusc_newsplit_5frame_span10_stage3_joint_finetune.py",
"chars": 13824,
"preview": "_base_ = [\n '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin"
},
{
"path": "plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage1_bev_pretrain.py",
"chars": 13876,
"preview": "_base_ = [\n '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin"
},
{
"path": "plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage2_warmup.py",
"chars": 13575,
"preview": "_base_ = [\n '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin"
},
{
"path": "plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune.py",
"chars": 13719,
"preview": "_base_ = [\n '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin"
},
{
"path": "plugin/core/apis/__init__.py",
"chars": 126,
"preview": "from .train import custom_train_model\nfrom .mmdet_train import custom_train_detector\n# from .test import custom_multi_gp"
},
{
"path": "plugin/core/apis/mmdet_train.py",
"chars": 8885,
"preview": "# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ----------------------"
},
{
"path": "plugin/core/apis/test.py",
"chars": 6103,
"preview": "# ---------------------------------------------\r\n# Copyright (c) OpenMMLab. All rights reserved.\r\n# --------------------"
},
{
"path": "plugin/core/apis/train.py",
"chars": 2038,
"preview": "# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ----------------------"
},
{
"path": "plugin/core/evaluation/__init__.py",
"chars": 42,
"preview": "from .eval_hooks import CustomDistEvalHook"
},
{
"path": "plugin/core/evaluation/eval_hooks.py",
"chars": 3489,
"preview": "\n# Note: Considering that MMCV's EvalHook updated its interface in V1.3.16,\n# in order to avoid strong version dependenc"
},
{
"path": "plugin/datasets/__init__.py",
"chars": 100,
"preview": "from .pipelines import *\nfrom .argo_dataset import AV2Dataset\nfrom .nusc_dataset import NuscDataset\n"
},
{
"path": "plugin/datasets/argo_dataset.py",
"chars": 6075,
"preview": "from .base_dataset import BaseMapDataset\nfrom .map_utils.av2map_extractor import AV2MapExtractor\nfrom mmdet.datasets imp"
},
{
"path": "plugin/datasets/base_dataset.py",
"chars": 19629,
"preview": "import numpy as np\nimport os\nimport os.path as osp\nimport mmcv\nfrom .evaluation.raster_eval import RasterEvaluate\nfrom ."
},
{
"path": "plugin/datasets/builder.py",
"chars": 7195,
"preview": "# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ----------------------"
},
{
"path": "plugin/datasets/evaluation/AP.py",
"chars": 4587,
"preview": "import numpy as np\nfrom .distance import chamfer_distance, frechet_distance, chamfer_distance_batch\nfrom typing import L"
},
{
"path": "plugin/datasets/evaluation/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "plugin/datasets/evaluation/distance.py",
"chars": 2143,
"preview": "from scipy.spatial import distance\nfrom numpy.typing import NDArray\nimport torch\n\ndef chamfer_distance(line1: NDArray, l"
},
{
"path": "plugin/datasets/evaluation/raster_eval.py",
"chars": 3559,
"preview": "import torch\nfrom mmdet3d.datasets import build_dataset, build_dataloader\nimport mmcv\nfrom functools import cached_prope"
},
{
"path": "plugin/datasets/evaluation/vector_eval.py",
"chars": 11458,
"preview": "from functools import partial\nimport numpy as np\nfrom multiprocessing import Pool\nfrom mmdet3d.datasets import build_dat"
},
{
"path": "plugin/datasets/map_utils/av2map_extractor.py",
"chars": 18689,
"preview": "from av2.map.map_api import ArgoverseStaticMap\nfrom pathlib import Path\nfrom shapely.geometry import LineString, box, Po"
},
{
"path": "plugin/datasets/map_utils/nuscmap_extractor.py",
"chars": 30521,
"preview": "from shapely.geometry import LineString, box, Polygon\nfrom shapely import ops, strtree\n\nimport numpy as np\nfrom nuscenes"
},
{
"path": "plugin/datasets/map_utils/utils.py",
"chars": 10177,
"preview": "from shapely.geometry import LineString, box, Polygon, LinearRing\nfrom shapely.geometry.base import BaseGeometry\nfrom sh"
},
{
"path": "plugin/datasets/nusc_dataset.py",
"chars": 5919,
"preview": "from.base_dataset import BaseMapDataset\nfrom .map_utils.nuscmap_extractor import NuscMapExtractor\nfrom mmdet.datasets im"
},
{
"path": "plugin/datasets/pipelines/__init__.py",
"chars": 503,
"preview": "from .loading import LoadMultiViewImagesFromFiles\nfrom .formating import FormatBundleMap\nfrom .transform import ResizeMu"
},
{
"path": "plugin/datasets/pipelines/formating.py",
"chars": 3250,
"preview": "import numpy as np\nfrom mmcv.parallel import DataContainer as DC\n\nfrom mmdet3d.core.points import BasePoints\nfrom mmdet."
},
{
"path": "plugin/datasets/pipelines/loading.py",
"chars": 2466,
"preview": "import mmcv\nimport numpy as np\nfrom mmdet.datasets.builder import PIPELINES\n\n@PIPELINES.register_module(force=True)\nclas"
},
{
"path": "plugin/datasets/pipelines/rasterize.py",
"chars": 14736,
"preview": "import numpy as np\nfrom mmdet.datasets.builder import PIPELINES\nfrom shapely.geometry import LineString, Polygon\nfrom sh"
},
{
"path": "plugin/datasets/pipelines/transform.py",
"chars": 11063,
"preview": "import numpy as np\nimport mmcv\n\nfrom mmdet.datasets.builder import PIPELINES\nfrom numpy import random\n\n@PIPELINES.regist"
},
{
"path": "plugin/datasets/pipelines/vectorize.py",
"chars": 6988,
"preview": "import numpy as np\nfrom mmdet.datasets.builder import PIPELINES\nfrom shapely.geometry import LineString\nfrom numpy.typin"
},
{
"path": "plugin/datasets/samplers/__init__.py",
"chars": 187,
"preview": "from .group_sampler import DistributedGroupSampler, InfiniteGroupEachSampleInBatchSampler\nfrom .distributed_sampler impo"
},
{
"path": "plugin/datasets/samplers/distributed_sampler.py",
"chars": 2922,
"preview": "# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ----------------------"
},
{
"path": "plugin/datasets/samplers/group_sampler.py",
"chars": 13030,
"preview": "# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ----------------------"
},
{
"path": "plugin/datasets/samplers/sampler.py",
"chars": 487,
"preview": "# ---------------------------------------------\r\n# Copyright (c) OpenMMLab. All rights reserved.\r\n# --------------------"
},
{
"path": "plugin/datasets/visualize/renderer.py",
"chars": 9956,
"preview": "import os.path as osp\nimport os\nimport av2.geometry.interpolate as interp_utils\nimport numpy as np\nimport copy\nimport cv"
},
{
"path": "plugin/models/__init__.py",
"chars": 188,
"preview": "from .backbones import *\nfrom .heads import *\nfrom .necks import *\nfrom .losses import *\nfrom .mapers import *\nfrom .tra"
},
{
"path": "plugin/models/assigner/__init__.py",
"chars": 192,
"preview": "from .assigner import HungarianLinesAssigner\nfrom .match_cost import MapQueriesCost, BBoxLogitsCost, DynamicLinesCost, I"
},
{
"path": "plugin/models/assigner/assigner.py",
"chars": 6947,
"preview": "import torch\n\nfrom mmdet.core.bbox.builder import BBOX_ASSIGNERS\nfrom mmdet.core.bbox.assigners import AssignResult\nfrom"
},
{
"path": "plugin/models/assigner/match_cost.py",
"chars": 13147,
"preview": "import torch\nfrom mmdet.core.bbox.match_costs.builder import MATCH_COST\nfrom mmdet.core.bbox.match_costs import build_ma"
},
{
"path": "plugin/models/backbones/__init__.py",
"chars": 50,
"preview": "from .bevformer_backbone import BEVFormerBackbone\n"
},
{
"path": "plugin/models/backbones/bevformer/__init__.py",
"chars": 346,
"preview": "from .custom_base_transformer_layer import MyCustomBaseTransformerLayer\nfrom .encoder import BEVFormerEncoder\nfrom .spat"
},
{
"path": "plugin/models/backbones/bevformer/custom_base_transformer_layer.py",
"chars": 21094,
"preview": "# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ----------------------"
},
{
"path": "plugin/models/backbones/bevformer/encoder.py",
"chars": 17392,
"preview": "\"\"\"\nBorrowed from StreamMapNet, and add BEV memory fusion\n\"\"\"\n\nfrom .custom_base_transformer_layer import MyCustomBaseTr"
},
{
"path": "plugin/models/backbones/bevformer/grid_mask.py",
"chars": 4043,
"preview": "import torch\nimport torch.nn as nn\nimport numpy as np\nfrom PIL import Image\nfrom mmcv.runner import force_fp32, auto_fp1"
},
{
"path": "plugin/models/backbones/bevformer/multi_scale_deformable_attn_function.py",
"chars": 6120,
"preview": "# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ----------------------"
},
{
"path": "plugin/models/backbones/bevformer/spatial_cross_attention.py",
"chars": 27867,
"preview": "\n# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ---------------------"
},
{
"path": "plugin/models/backbones/bevformer/temporal_net.py",
"chars": 2463,
"preview": "import torch\nimport torch.nn as nn\nfrom typing import Optional, Sequence, Tuple, Union\nfrom mmdet.models import NECKS\nfr"
},
{
"path": "plugin/models/backbones/bevformer/temporal_self_attention.py",
"chars": 11833,
"preview": "# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ----------------------"
},
{
"path": "plugin/models/backbones/bevformer/transformer.py",
"chars": 7736,
"preview": "# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ----------------------"
},
{
"path": "plugin/models/backbones/bevformer_backbone.py",
"chars": 8878,
"preview": "import copy\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom mmdet.models import BACKBONES\nfrom "
},
{
"path": "plugin/models/heads/MapDetectorHead.py",
"chars": 35418,
"preview": "import copy\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import Conv2d, Linear, buil"
},
{
"path": "plugin/models/heads/MapSegHead.py",
"chars": 3606,
"preview": "import copy\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import C"
},
{
"path": "plugin/models/heads/__init__.py",
"chars": 80,
"preview": "from .MapDetectorHead import MapDetectorHead\nfrom .MapSegHead import MapSegHead\n"
},
{
"path": "plugin/models/heads/base_map_head.py",
"chars": 1366,
"preview": "from abc import ABCMeta, abstractmethod\n\nimport torch.nn as nn\nfrom mmcv.runner import auto_fp16\nfrom mmcv.utils import "
},
{
"path": "plugin/models/losses/__init__.py",
"chars": 104,
"preview": "from .detr_loss import LinesL1Loss, MasksLoss, LenLoss\nfrom .seg_loss import MaskFocalLoss, MaskDiceLoss"
},
{
"path": "plugin/models/losses/detr_loss.py",
"chars": 4562,
"preview": "import torch\nfrom torch import nn as nn\nfrom torch.nn import functional as F\nfrom mmdet.models.losses import l1_loss, sm"
},
{
"path": "plugin/models/losses/seg_loss.py",
"chars": 4741,
"preview": "import torch\nfrom torch import nn as nn\nfrom torch.nn import functional as F\nimport mmcv\n\nfrom mmdet.models.builder impo"
},
{
"path": "plugin/models/mapers/MapTracker.py",
"chars": 51964,
"preview": "\"\"\"\n MapTracker main module, adapted from StreamMapNet\n\"\"\"\nimport numpy as np\nimport torch\nimport torch.nn as nn\n\nfro"
},
{
"path": "plugin/models/mapers/__init__.py",
"chars": 34,
"preview": "from .MapTracker import MapTracker"
},
{
"path": "plugin/models/mapers/base_mapper.py",
"chars": 5140,
"preview": "from abc import ABCMeta, abstractmethod\n\nimport torch.nn as nn\nfrom mmcv.runner import auto_fp16\nfrom mmcv.utils import "
},
{
"path": "plugin/models/mapers/vector_memory.py",
"chars": 15530,
"preview": "import torch\nfrom torch import nn\n\nfrom einops import repeat, rearrange\nfrom scipy.spatial.transform import Rotation as "
},
{
"path": "plugin/models/necks/__init__.py",
"chars": 24,
"preview": "from .gru import ConvGRU"
},
{
"path": "plugin/models/necks/gru.py",
"chars": 1685,
"preview": "import torch\nimport torch.nn as nn\nfrom mmdet.models import NECKS\nfrom mmcv.cnn.utils import kaiming_init, constant_init"
},
{
"path": "plugin/models/transformer_utils/CustomMSDeformableAttention.py",
"chars": 10603,
"preview": "# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ----------------------"
},
{
"path": "plugin/models/transformer_utils/MapTransformer.py",
"chars": 20333,
"preview": "# Copyright (c) OpenMMLab. All rights reserved.\nimport math\nimport warnings\nimport copy\n\nimport torch\nimport torch.nn as"
},
{
"path": "plugin/models/transformer_utils/__init__.py",
"chars": 306,
"preview": "from .deformable_transformer import DeformableDetrTransformer_, DeformableDetrTransformerDecoder_\nfrom .base_transformer"
},
{
"path": "plugin/models/transformer_utils/base_transformer.py",
"chars": 876,
"preview": "import numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom mmcv.cnn import xavier_init,"
},
{
"path": "plugin/models/transformer_utils/deformable_transformer.py",
"chars": 15647,
"preview": "# Copyright (c) OpenMMLab. All rights reserved.\nimport math\nimport warnings\n\nimport torch\nimport torch.nn as nn\nfrom mmc"
},
{
"path": "plugin/models/transformer_utils/fp16_dattn.py",
"chars": 17056,
"preview": "from turtle import forward\nimport warnings\ntry:\n from mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAtt"
},
{
"path": "plugin/models/utils/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "plugin/models/utils/query_update.py",
"chars": 2557,
"preview": "import math\nimport torch\nimport torch.nn as nn \nimport numpy as np\nfrom mmcv.cnn import bias_init_with_prob, xavier_init"
},
{
"path": "plugin/models/utils/renderer_track.py",
"chars": 7477,
"preview": "import os.path as osp\nimport os\n#import av2.geometry.interpolate as interp_utils\nimport numpy as np\nimport copy\nimport c"
},
{
"path": "requirements.txt",
"chars": 131,
"preview": "av2\nnuscenes-devkit\neinops==0.6.1\nnumpy==1.23.5\nnumba==0.53.0\nShapely==1.8.5\nyapf==0.40.1\nsetuptools==59.5.0\nimageio-ffm"
},
{
"path": "tools/benchmark.py",
"chars": 4629,
"preview": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport time\nimport torch\nfrom mmcv import Config\nfrom mm"
},
{
"path": "tools/data_converter/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "tools/data_converter/argoverse_converter.py",
"chars": 7270,
"preview": "from functools import partial\nfrom multiprocessing import Pool\nimport multiprocessing\nfrom random import sample\nimport t"
},
{
"path": "tools/data_converter/av2_train_split.txt",
"chars": 27713,
"preview": "bb110668-5037-3c04-bd34-34cf1ace8d0f\n8beeb8db-28f9-396c-b752-17f906505948\n247f91e7-3177-33ad-b99e-0e0a4dc76751\n40bfcbec-"
},
{
"path": "tools/data_converter/av2_val_split.txt",
"chars": 5513,
"preview": "22dcf96c-ef5e-376b-9db5-dc9f91040f5e\n5b1d8b11-4f90-3577-be0b-193e102fda82\n3f9796e9-c892-3915-b719-3292df878ece\nb5a7ff7e-"
},
{
"path": "tools/data_converter/nusc_split.py",
"chars": 13499,
"preview": "TRAIN_SCENES = [\n \"scene-0002\", \"scene-0003\", \"scene-0004\", \"scene-0005\", \"scene-0006\", \n \"scene-0007\", \"scene-000"
},
{
"path": "tools/data_converter/nuscenes_converter.py",
"chars": 7067,
"preview": "import mmcv\nimport numpy as np\nfrom os import path as osp\nfrom pyquaternion import Quaternion\nimport argparse\nfrom nusc_"
},
{
"path": "tools/dist_test.sh",
"chars": 272,
"preview": "#!/usr/bin/env bash\n\nCONFIG=$1\nCHECKPOINT=$2\nGPUS=$3\nPORT=${PORT:-29500}\n\nPYTHONPATH=\"$(dirname $0)/..\":$PYTHONPATH \\\npy"
},
{
"path": "tools/dist_train.sh",
"chars": 247,
"preview": "#!/usr/bin/env bash\n\nCONFIG=$1\nGPUS=$2\nPORT=${PORT:-29500}\n\nPYTHONPATH=\"$(dirname $0)/..\":$PYTHONPATH \\\npython -m torch."
},
{
"path": "tools/mmdet_test.py",
"chars": 6832,
"preview": "import os.path as osp\nimport pickle\nimport shutil\nimport tempfile\nimport time\n\nimport mmcv\nimport torch\nimport torch.dis"
},
{
"path": "tools/mmdet_train.py",
"chars": 6336,
"preview": "import random\nimport warnings\n\nimport numpy as np\nimport torch\nfrom mmcv.parallel import MMDataParallel, MMDistributedDa"
},
{
"path": "tools/slurm_test.sh",
"chars": 566,
"preview": "#!/usr/bin/env bash\n\nset -x\n\nPARTITION=$1\nJOB_NAME=$2\nCONFIG=$3\nCHECKPOINT=$4\nGPUS=${GPUS:-8}\nGPUS_PER_NODE=${GPUS_PER_N"
},
{
"path": "tools/slurm_train.sh",
"chars": 574,
"preview": "#!/usr/bin/env bash\n\nset -x\n\nPARTITION=$1\nJOB_NAME=$2\nCONFIG=$3\nWORK_DIR=$4\nGPUS=${GPUS:-8}\nGPUS_PER_NODE=${GPUS_PER_NOD"
},
{
"path": "tools/test.py",
"chars": 10274,
"preview": "import argparse\nimport mmcv\nimport os\nimport os.path as osp\nimport torch\nimport warnings\nfrom mmcv import Config, DictAc"
},
{
"path": "tools/tracking/calculate_cmap.py",
"chars": 13510,
"preview": "import argparse\nfrom mmcv import Config\nfrom mmdet3d.datasets import build_dataset\nimport cv2\nimport torch\nimport numpy "
},
{
"path": "tools/tracking/cmap_utils/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "tools/tracking/cmap_utils/data_utils.py",
"chars": 3866,
"preview": "import mmcv\nimport os\nfrom mmdet3d.datasets import build_dataloader\nimport numpy as np\nfrom copy import deepcopy\nfrom fu"
},
{
"path": "tools/tracking/cmap_utils/match_utils.py",
"chars": 6054,
"preview": "import torch\nimport numpy as np\nfrom scipy.optimize import linear_sum_assignment\n\nfrom .utils import *\n\ncat2id = {\n '"
},
{
"path": "tools/tracking/cmap_utils/utils.py",
"chars": 5105,
"preview": "import cv2\nfrom PIL import Image, ImageDraw\nimport os\nimport torch\nimport numpy as np\nfrom shapely.geometry import LineS"
},
{
"path": "tools/tracking/prepare_gt_tracks.py",
"chars": 15092,
"preview": "import argparse\nimport mmcv\nfrom mmcv import Config\nimport os\nfrom mmdet3d.datasets import build_dataset, build_dataload"
},
{
"path": "tools/tracking/prepare_pred_tracks.py",
"chars": 10501,
"preview": "import argparse\nimport mmcv\nfrom mmcv import Config\nimport os\nfrom mmdet3d.datasets import build_dataset\nimport cv2\nimpo"
},
{
"path": "tools/train.py",
"chars": 10535,
"preview": "# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ----------------------"
},
{
"path": "tools/visualization/vis_global.py",
"chars": 62045,
"preview": "import sys\nimport os\nSCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))\nsys.path.append(os.path.dirname(SCRIPT_DIR)"
},
{
"path": "tools/visualization/vis_per_frame.py",
"chars": 10761,
"preview": "import sys\nimport os\nSCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))\nsys.path.append(os.path.dirname(SCRIPT_DIR)"
}
]
About this extraction
This page contains the full source code of the woodfrog/maptracker GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 161 files (1.1 MB), approximately 292.7k tokens, and a symbol index with 552 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.